Subsequently, I perform some of the steps of the Seurat analysis as
described in the documentation for Seurat project (https://satijalab.org/seurat/articles/integration_introduction).
The remaining steps are completed after merging the samples.
Sample 1
#Quality control
seurat_s1[["percent.mt"]] <- PercentageFeatureSet(seurat_s1, pattern = "^mt-")
#View(seurat_s1@meta.data)
VlnPlot(seurat_s1, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s1, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

seurat_s1.filtered <- subset(seurat_s1, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)
FeatureScatter(seurat_s1.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s1.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s1_normalized <- NormalizeData(object = seurat_s1.filtered)
## Normalizing layer: counts
#Integration
s1_integrated <- FindVariableFeatures(object = s1_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s1_integrated), 10)
top10
## [1] "Hspa1b" "Gm11290" "Hspa1a" "Ly6c2" "Klra5" "Il7r" "Plac8"
## [8] "Stmn1" "Cxcl10" "Gzmb"
plot1 <- VariableFeaturePlot(s1_integrated)
plot1
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

#Scaling
s1_scaled <- ScaleData(object = s1_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s1_reduced <- RunPCA(object = s1_scaled)
## PC_ 1
## Positive: Gzma, Prf1, Gzmb, Ccl5, Irf8, Ccl3, Ccl4, Itgam, Cma1, Lgals1
## Actb, Plek, Klrg1, Ncr1, Klra9, Cdc20b, Sgk1, Serpinb9, Serpinb9b, ENSMUSG00000121360
## Ccnd2, H1f2, Eomes, Klri2, Nr4a1, Klrb1a, Zeb2, Cx3cr1, Klra4, Klra8
## Negative: Emb, Ly6e, Il7r, Tcf7, Cd3e, Gpr183, Ikzf2, Trgc4, Cd3g, Ltb
## Inpp4b, Cd7, Cd28, Camk4, Rora, Thy1, Cxcr6, Gramd3, Slco3a1, Jaml
## Lyst, Cd3d, Itgb3, Ly6a, Trgc2, Ssh2, Zfp36l1, Plac8, Actn1, Tmem176b
## PC_ 2
## Positive: Dhrs3, Eomes, Stat3, Bcl2l11, Vegfa, Pmepa1, Ccr2, Tgfb1, Car2, Myb
## Cemip2, Rnf157, Cxcr4, Tspan13, Skil, Ctla2a, Dgat1, Malt1, Fosl2, Irf8
## Birc3, Vps37b, Irak2, Plxnc1, Gpr132, Bach2, Cux1, Crem, Gzma, Sult2b1
## Negative: Cd3e, Cd3g, Ly6c2, S100a6, Actb, Cd3d, Trgc2, Cd226, Cxcr6, Trgc4
## Il7r, Jaml, Trgc1, Hopx, Slfn1, Ly6a, Cd160, S100a4, Ncr1, Inpp4b
## Slco3a1, Bcl11b, Trgv2, Actn1, Trbc2, Dpp4, Trac, Ccnd2, Trgv1, Cd8a
## PC_ 3
## Positive: Pim1, Nr4a1, Nfkbiz, Nfkbia, Serpinb9, Nr4a3, Bhlhe40, Kdm6b, Ifrd1, Nfkbid
## Vps37b, Icam1, Vegfa, Fosl2, Dusp5, Gadd45b, Nabp1, Spry2, Irf8, Cdkn1a
## Nfkbie, Phlda1, Birc3, Crem, Mxd1, Nr4a2, Tnfrsf9, Serpinb6b, Rel, Gem
## Negative: Fos, Jun, Hspa1b, Klf2, Fosb, Hspa1a, Egr1, Dnajb1, Arl4d, Klra4
## Cited4, Pmepa1, Psap, Gm12185, S1pr1, 9930111J21Rik2, Ccr2, ENSMUSG00000121360, Ctla2a, Tcf7
## Il18r1, Klf6, Klra8, ENSMUSG00000120357, Rhob, Ifi208, Myb, Eomes, Trdc, Ifi213
## PC_ 4
## Positive: Cd3e, Cd3g, Cd3d, Trgc2, Gramd3, Trgc1, Nr4a1, Dpp4, Ikzf2, Trac
## Vps37b, Actn1, Il7r, Nfkbiz, Trgv2, Trbc2, Nfkbid, Nsg2, Nfkbia, Itk
## Kdm6b, Trgc4, Bcl11b, Dusp5, Cd28, Spry2, Nr4a3, Cd5, Thy1, Trgv1
## Negative: Ifitm3, H2-Aa, Cd74, H2-Ab1, Lyz2, Ccl6, App, Apoe, Mmp14, Clec7a
## Csf1r, Spi1, Myof, Mpeg1, Ly6i, ENSMUSG00000120878, Alox5ap, Lst1, H2-Eb1, Nlrp3
## Ncf2, Ifi211, H2-DMb1, Mafb, Adgre1, Cd68, Tlr13, Il1b, Ms4a8a, Pla2g4a
## PC_ 5
## Positive: Cd3e, Cd3g, Cd3d, Trac, Trgc2, Slfn1, Cxcr4, Dpp4, Trgc1, Pmepa1
## Kdm6b, Ass1, Vegfa, Fth1, Pim1, Nsg2, Cd5, Fosl2, Crem, Trgv2
## Nfkbid, Gramd3, Vps37b, Klra6, Cdkn1a, Nr4a2, Rgs16, S1pr1, Ifi27l2a, A330040F15Rik
## Negative: Tmem176b, Gm36723, Ncr1, Ckb, Klrb1b, Tmem176a, Fgl2, Dscam, Rora, S100a4
## Krt83, Peak1, Adgrg3, Slamf7, Coro2a, Cxcr6, Klrc1, Sema6d, AU020206, Id2
## St6galnac3, Cd226, Asb2, Fosb, Gm2682, Etv6, Itgax, Fos, Il18r1, Ripor2
VizDimLoadings(s1_reduced, dims = 1:2, reduction = "pca")

DimPlot(s1_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s1_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s1_reduced)

#Clustering
s1_neighboors <- FindNeighbors(object = s1_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s1_clusters <- FindClusters(object = s1_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1660
## Number of edges: 64144
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6864
## Number of communities: 7
## Elapsed time: 0 seconds
head(Idents(s1_clusters), 5) #cluster IDs 1st 5 cells
## 14310 38613 44195 93453 157761
## 1 0 0 3 0
## Levels: 0 1 2 3 4 5 6
#UMAP
s1_umap <- RunUMAP(object = s1_clusters, dims = 1:20)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 00:28:53 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:28:53 Read 1660 rows and found 20 numeric columns
## 00:28:53 Using Annoy for neighbor search, n_neighbors = 30
## 00:28:53 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:28:53 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1617236c471
## 00:28:53 Searching Annoy index using 1 thread, search_k = 3000
## 00:28:53 Annoy recall = 100%
## 00:28:53 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:28:54 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:28:54 Commencing optimization for 500 epochs, with 64836 positive edges
## 00:28:55 Optimization finished
DimPlot(s1_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s1 <- paramSweep(s1_umap, PCs = 1:20, sct = FALSE)
## Loading required package: fields
## Loading required package: spam
## Spam version 2.11-1 (2025-01-20) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
##
## Attaching package: 'spam'
##
## The following object is masked from 'package:stats4':
##
## mle
##
## The following objects are masked from 'package:base':
##
## backsolve, forwardsolve
##
## Loading required package: viridisLite
##
## Try help(fields) to get started.
## Loading required package: parallel
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.01..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s1 <- summarizeSweep(sweep.res.list_s1, GT = FALSE)
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded
## Copyright M. P. Wand 1997-2009
## Loading required package: ROCR
bcmvn_s1 <- find.pK(sweep.stats_s1)

## NULL
ggplot(bcmvn_s1, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.29 max
pK <- bcmvn_s1[bcmvn_s1$BCmetric == max(bcmvn_s1$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.29
#Homotypic doublet proportion estimate
annotations <- s1_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s1_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 67
#DoubletFinder
s1_finalized <- doubletFinder(s1_umap,
PCs = 1:20,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 553 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s1_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.29_67")

table(s1_finalized@meta.data$DF.classifications_0.25_0.29_67)
##
## Doublet Singlet
## 67 1593
s1_finalized <- subset(s1_finalized, subset = DF.classifications_0.25_0.29_67 == "Singlet")
table(s1_finalized@meta.data$DF.classifications_0.25_0.29_67)
##
## Singlet
## 1593
DimPlot(s1_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.29_67")

Sample 2
#Quality control
seurat_s2[["percent.mt"]] <- PercentageFeatureSet(seurat_s2, pattern = "^mt-")
#View(seurat_s2@meta.data)
VlnPlot(seurat_s2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s2, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

seurat_s2.filtered <- subset(seurat_s2, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)
FeatureScatter(seurat_s2.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s2.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s2_normalized <- NormalizeData(object = seurat_s2.filtered)
## Normalizing layer: counts
#Integration
s2_integrated <- FindVariableFeatures(object = s2_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s2_integrated), 10)
top10
## [1] "Hspa1b" "Trac" "Gm11290" "Il7r" "Klra5" "Plac8" "Gzmb"
## [8] "Hspa1a" "Ccl4" "Ccl3"
plot1 <- VariableFeaturePlot(s2_integrated)
plot1
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

#Scaling
s2_scaled <- ScaleData(object = s2_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s2_reduced <- RunPCA(object = s2_scaled)
## PC_ 1
## Positive: Emb, Il7r, Camk4, Ly6e, Gpr183, Tcf7, Cd3e, Inpp4b, Itgb3, Gramd3
## Cxcr6, Lyst, Ikzf2, Ltb, Gna13, Cd3g, Rora, Gpr132, Cd3d, Cd28
## Xcl1, Zfp36l1, Trac, Odc1, Plac8, Smad7, Lilrb4b, Trgc4, Zc3hav1, Ly6a
## Negative: Gzma, Prf1, Ccl5, Gzmb, Ccl3, Ccl4, Irf8, Itgam, Lgals1, Cma1
## Actb, S1pr5, Plek, Klrg1, Klra9, Ncr1, Zeb2, Cdc20b, Sgk1, Klf2
## ENSMUSG00000121360, Klri2, H1f2, Itga2, Ccnd2, Klra4, Serpinb9b, Rap1gap2, Ifng, Eomes
## PC_ 2
## Positive: Eomes, Dhrs3, Myb, Stat3, Ctla2a, Slc26a11, Satb1, Plxnc1, Pmepa1, Ccr2
## Rnf157, Cemip2, Tgfb1, Cenpa, Tpd52, Klra4, Sft2d2, Bach2, Tcf7, Slc3a2
## Car2, Vegfa, Gstm1, Ssh2, Smad7, Klra8, Cxcr4, Ust, Mideas, Sult2b1
## Negative: S100a6, S100a4, Cd52, Actb, Ly6a, Cd226, Inpp4b, Cxcr6, Ly6c2, Adam8
## Cd74, Ifitm2, Ncr1, Il7r, Lgals1, Ccnd2, Cd3e, Vim, Fgl2, Clec7a
## Asb2, Cd3g, Tmem176b, AA467197, Trgc4, H2-Ab1, Lyz2, Capg, Cxcl2, Sgms1
## PC_ 3
## Positive: Clec4e, Lyz2, Tgfbi, Tnfaip2, Nlrp3, Cxcl2, Il1b, Clec4d, Clec4n, Clec7a
## Csf2rb, Ptgs2, Cd74, Pirb, Vegfa, Lilra6, C5ar1, Ptafr, Inhba, Basp1
## Pstpip2, Spi1, AA467197, Csf2ra, Hcar2, Ifitm2, Cd300a, Nfam1, Ccr1, Haao
## Negative: Cd3e, Cd3g, Cd3d, Trac, Trgc2, Trgc1, Bcl11b, Trgv2, Trbc2, Dpp4
## Trgc4, Actn1, Trgv1, Nsg2, Ikzf2, Il7r, Cd5, Cd8a, Inpp4b, Trat1
## Dtx1, Lat, Cdh1, Cd226, Cxcr6, P2rx7, Grap2, Cd160, Adam19, Tnfaip8
## PC_ 4
## Positive: Cd3e, Cd3d, Cd3g, Pmepa1, Trgc2, Dpp4, Nsg2, Trac, Fth1, Trgc1
## Clec4e, S1pr1, Dtx4, Nt5e, Atp1a3, Clec7a, Clk2, Trgv2, Lyz2, ENSMUSG00000121069
## Pygl, Clec4d, Clec4n, Csf2rb, Actn1, Cxcl2, Il1b, Klra6, Rgs10, Pstpip2
## Negative: Serpinb9, Spry2, Nabp1, Il12rb2, Dusp5, Irf8, Nr4a1, Ncr1, Dennd4a, Serpinb6b
## Plek, Nfkbiz, Prf1, Nr4a3, Ccnd2, Gm36723, Tmem176a, P2ry10, Pim1, Nfkbia
## Fgl2, Actb, Vegfa, Cd52, Ifrd1, Lilrb4b, Tmem176b, Mxd1, Nr4a2, Arsb
## PC_ 5
## Positive: Tmem176a, Tmem176b, Klrb1b, Ckb, Vegfa, Krt83, Prnp, Il2ra, Rora, ENSMUSG00000121239
## Cxcr6, Dscam, Dgat1, Acpp, Gm36723, Fth1, Adgrg3, Hoxa5, Trgc4, Ltb
## Itgb3, Furin, Ccdc184, Septin8, Sdc4, Il7r, Scn1b, Nrgn, Hilpda, Serpina3g
## Negative: Utrn, Fos, Ifi208, Ifit1, Fosb, Rtp4, Ifit2, Ifit3, Rsad2, Ifi206
## Mx1, Cblb, Ifi209, Cd3e, Klf6, Slfn8, Ifit1bl1, Cd69, Ripor2, Haao
## Klra7, Klra6, Stat1, Phf21a, Samhd1, Ifi213, Elmo1, Isg15, Cmpk2, Cmah
VizDimLoadings(s2_reduced, dims = 1:2, reduction = "pca")

DimPlot(s2_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s2_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s2_reduced)

#Clustering
s2_neighboors <- FindNeighbors(object = s2_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s2_clusters <- FindClusters(object = s2_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 916
## Number of edges: 37290
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6433
## Number of communities: 5
## Elapsed time: 0 seconds
head(Idents(s2_clusters), 5) #cluster IDs 1st 5 cells
## 176454 252066 285663 298134 298967
## 1 0 2 0 1
## Levels: 0 1 2 3 4
#UMAP
s2_umap <- RunUMAP(object = s2_clusters, dims = 1:20)
## 00:29:18 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:29:18 Read 916 rows and found 20 numeric columns
## 00:29:18 Using Annoy for neighbor search, n_neighbors = 30
## 00:29:18 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:29:18 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1613f7c67a6
## 00:29:18 Searching Annoy index using 1 thread, search_k = 3000
## 00:29:18 Annoy recall = 100%
## 00:29:19 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:29:19 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:29:19 Commencing optimization for 500 epochs, with 35074 positive edges
## 00:29:20 Optimization finished
DimPlot(s2_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s2 <- paramSweep(s2_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s2 <- summarizeSweep(sweep.res.list_s2, GT = FALSE)
bcmvn_s2 <- find.pK(sweep.stats_s2)

## NULL
ggplot(bcmvn_s2, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.11 max
pK <- bcmvn_s2[bcmvn_s2$BCmetric == max(bcmvn_s2$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.11
#Homotypic doublet proportion estimate
annotations <- s2_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s2_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 34
#DoubletFinder
s2_finalized <- doubletFinder(s2_umap,
PCs = 1:20,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 305 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s2_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.11_34")

table(s2_finalized@meta.data$DF.classifications_0.25_0.11_34)
##
## Doublet Singlet
## 34 882
s2_finalized <- subset(s2_finalized, subset = DF.classifications_0.25_0.11_34 == "Singlet")
table(s2_finalized@meta.data$DF.classifications_0.25_0.11_34)
##
## Singlet
## 882
DimPlot(s2_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.11_34")

Sample 3
#Quality control
seurat_s3[["percent.mt"]] <- PercentageFeatureSet(seurat_s3, pattern = "^mt-")
#View(seurat_s3@meta.data)
VlnPlot(seurat_s3, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s3, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

seurat_s3.filtered <- subset(seurat_s3, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)
FeatureScatter(seurat_s3.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s3.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s3_normalized <- NormalizeData(object = seurat_s3.filtered)
## Normalizing layer: counts
#Integration
s3_integrated <- FindVariableFeatures(object = s3_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s3_integrated), 10)
top10
## [1] "Hspa1b" "Gm11290" "Il7r" "Klra5" "Ccl3" "Ccl9"
## [7] "Ly6c2" "Hspa1a" "Serpinb9b" "Gzmb"
plot1 <- VariableFeaturePlot(s3_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s3_scaled <- ScaleData(object = s3_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s3_reduced <- RunPCA(object = s3_scaled)
## PC_ 1
## Positive: Emb, Il7r, Ly6e, Tcf7, Gpr183, Cd3e, Trgc4, Camk4, Ikzf2, Gpr132
## Itgb3, Lyst, Zfp36l1, Inpp4b, Gna13, Plac8, Cd3d, Trbc2, Gramd3, Cd28
## Peli1, Tmem176a, Icos, Cd3g, Irak2, Ltb, Rora, Retreg1, Smad7, Cxcr6
## Negative: Gzma, Prf1, Gzmb, Ccl3, Ccl4, Ccl5, Irf8, Itgam, Lgals1, Cma1
## Actb, Plek, Itgb2, S1pr5, Klrg1, Ncr1, Klra9, Cdc20b, Serpinb9, Sgk1
## Ccnd2, H1f2, Klra8, Zeb2, Dusp2, Serpinb9b, Klrb1a, Klf2, Il12rb2, ENSMUSG00000121360
## PC_ 2
## Positive: Satb1, Ctla2a, Pmepa1, Slc26a11, Bcl2l11, Tcf7, Ssh2, Irak2, Cemip2, Myb
## Smad7, Peli1, Fryl, Eomes, Plxnc1, Cblb, Car2, Cxcr4, Tgfb1, Emb
## Sult2b1, Prkch, Tnrc6b, Ipcef1, Mideas, Tpd52, Klra4, S1pr1, Dennd4a, ENSMUSG00000121360
## Negative: Tgm2, Ifitm2, Alox5ap, Cd74, Ifitm3, Gpr35, Gatm, Il1b, Ccr1, Pirb
## App, Sirpa, Mpeg1, Ly6a, Ifi30, Ly86, AA467197, ENSMUSG00000120878, Ms4a6d, Tgfbi
## Plbd1, Basp1, Gm19765, Clec7a, Csf2ra, Arg1, Adam8, Ifi211, Ier3, Ifitm1
## PC_ 3
## Positive: Trgc4, Il7r, Inpp4b, Cd160, Cd3e, Ly6c2, Cd226, Cxcr6, Cd3g, mt-Nd4
## Tmem176b, Ncr1, Cd3d, Hopx, S100a6, Trac, Cd52, Asb2, Tmem176a, Thy1
## Klrc1, Actb, Gpr183, Il2ra, Itgb2, Bcl11b, Trgc1, Camk4, Gpr34, Trgv1
## Negative: Eomes, Tgfb1, Cemip2, Stat3, Pmepa1, Tpd52, Plxnc1, Klra4, Vegfa, Myb
## Bcl2l11, Tspan13, Gzma, Ccr2, Fryl, ENSMUSG00000121360, Ctla2a, Irf8, Fth1, Klra8
## Fosl2, Klri2, Ndrg1, Cux1, Sult2b1, Smad7, Plac8, Cdkn1a, Tiparp, Malt1
## PC_ 4
## Positive: Pmepa1, Fth1, Cd3e, Aldoa, Cd3d, Osgin1, Cd3g, Cd8a, Trac, Rgs10
## Ifi27l2a, Cd6, S1pr1, Plcg1, Tspan13, Aim2, Dzank1, Cd5, Cd8b1, Gm8369
## Trgv2, Ass1, Actn1, Rgs16, Spsb1, Havcr2, Trgc2, Ctla2a, Nt5e, 2210408F21Rik
## Negative: Ncr1, Spry2, Plek, Arsb, Ripor2, mt-Nd4, Gm2682, Nr4a1, Dennd4a, Pitpnc1
## Itga2, Aoah, Dock8, Slc9a9, Celf2, Serpinb9, Fosb, Dock10, Elmo1, Atp8b4
## Itgax, Ccr5, Zeb2, Ifi203, Lpp, Nfkbiz, Vim, Itgb2, Gem, Actb
## PC_ 5
## Positive: Klf2, Fos, Jun, Cd3e, Cd3g, ENSMUSG00000121360, Klra4, Cmah, S1pr5, Cd3d
## Kif13b, Ncoa1, Wdr49, Zfp182, Tex9, Carns1, Tnik, Specc1, Rfx3, 9930111J21Rik2
## Prkch, Gm2682, Gpatch3, 4933406I18Rik, Zfp808, Hspa1b, Klra8, Kif16b, Ipcef1, Pvt1
## Negative: Vegfa, Fosl2, Dgat1, Serpinb9, Ifrd1, Pim1, Nfkbia, Nr4a3, Actg1, Nr4a1
## Kdm6b, P2ry10, Serpinb6b, Nfkbiz, Tnfrsf9, Crem, Mxd1, Irf8, Nudt4, Birc3
## Xcl1, Gpr132, Hilpda, Nfkbid, Icam1, Spry2, Cdkn1a, Cd52, Klrb1b, Car2
VizDimLoadings(s3_reduced, dims = 1:2, reduction = "pca")

DimPlot(s3_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s3_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s3_reduced)

#Clustering
s3_neighboors <- FindNeighbors(object = s3_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s3_clusters <- FindClusters(object = s3_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 850
## Number of edges: 35278
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6440
## Number of communities: 5
## Elapsed time: 0 seconds
head(Idents(s3_clusters), 5) #cluster IDs 1st 5 cells
## 12486 86980 203100 218885 227873
## 0 2 3 2 0
## Levels: 0 1 2 3 4
#UMAP
s3_umap <- RunUMAP(object = s3_clusters, dims = 1:20)
## 00:29:37 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:29:37 Read 850 rows and found 20 numeric columns
## 00:29:37 Using Annoy for neighbor search, n_neighbors = 30
## 00:29:37 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:29:37 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1616db9cb09
## 00:29:37 Searching Annoy index using 1 thread, search_k = 3000
## 00:29:37 Annoy recall = 100%
## 00:29:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:29:38 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:29:38 Commencing optimization for 500 epochs, with 32470 positive edges
## 00:29:39 Optimization finished
DimPlot(s3_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s3 <- paramSweep(s3_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s3 <- summarizeSweep(sweep.res.list_s3, GT = FALSE)
bcmvn_s3 <- find.pK(sweep.stats_s3)

## NULL
ggplot(bcmvn_s3, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.25 max
pK <- bcmvn_s3[bcmvn_s3$BCmetric == max(bcmvn_s3$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.25
#Homotypic doublet proportion estimate
annotations <- s3_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s3_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 31
#DoubletFinder
s3_finalized <- doubletFinder(s3_umap,
PCs = 1:20,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 283 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s3_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.25_31")

table(s3_finalized@meta.data$DF.classifications_0.25_0.25_31)
##
## Doublet Singlet
## 31 819
s3_finalized <- subset(s3_finalized, subset = DF.classifications_0.25_0.25_31 == "Singlet")
table(s3_finalized@meta.data$DF.classifications_0.25_0.25_31)
##
## Singlet
## 819
DimPlot(s3_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.25_31")

Sample 4
#Quality control
seurat_s4[["percent.mt"]] <- PercentageFeatureSet(seurat_s4, pattern = "^mt-")
#View(seurat_s4@meta.data)
VlnPlot(seurat_s4, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s4, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

seurat_s4.filtered <- subset(seurat_s4, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)
FeatureScatter(seurat_s4.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s4.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

#Normalization
s4_normalized <- NormalizeData(object = seurat_s4.filtered)
## Normalizing layer: counts
#Integration
s4_integrated <- FindVariableFeatures(object = s4_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s4_integrated), 10)
top10
## [1] "Gm11290" "Ly6c2" "Ccl3" "Ccl4" "Klra8"
## [6] "Xcl1" "Il7r" "St6galnac3" "Trac" "Inpp4b"
plot1 <- VariableFeaturePlot(s4_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s4_scaled <- ScaleData(object = s4_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s4_reduced <- RunPCA(object = s4_scaled)
## PC_ 1
## Positive: Gzma, Prf1, Ccl5, Irf8, Gzmb, Itgam, Eomes, Sell, Ccl3, ENSMUSG00000121360
## Cma1, Klf2, Klri2, ENSMUSG00000121359, Klra4, Zeb2, Actg1, Ccnd2, Ccl4, Klra8
## S1pr5, Emp3, Actb, Klrg1, Lgals1, Klra9, Itga2, Plek, Reep5, Klra7
## Negative: Il7r, Ly6e, Gpr183, Inpp4b, Cxcr6, Rora, Itga1, Camk4, Cd3g, Thy1
## Cd3e, Icos, Emb, Trac, Cd226, Cd3d, Cd28, Odc1, Gramd3, Ly6a
## Rbpj, Rhoh, Trgc4, Zfp36l1, Cd5, Trbc2, Tcf7, Cd8b1, Itgb3, Slc4a7
## PC_ 2
## Positive: Cd86, Slamf1, Adam19, Cd8b1, Ly6a, Cd8a, Themis, Pomt2, 4930469K13Rik, Fam169b
## Il1r2, Ifi27l2a, Cd5, Cbx8, Cep170b, Cd3e, Snhg17, Slfn1, Gm43329, Havcr2
## Ptprs, Pdcd1, Cxcr6, Trat1, Parp3, Invs, Dis3, Ephx1, Trac, Cd3g
## Negative: Tcf7, Cd7, Car2, Trgc4, Pcgf5, Zfp36l2, Epb41, Clcn3, Immp2l, Dscam
## Ern1, ENSMUSG00000120992, Kit, Mideas, Tmem176b, Xcl1, Scn1b, Mpp7, Cxcr4, Abca1
## Map3k14, Stat4, Raph1, Il7r, Klrb1b, Itgb3, Fam172a, Rftn1, Emb, Ttc14
## PC_ 3
## Positive: Bcl2l11, Myb, Slc26a11, Mau2, Cd27, Klhl9, Pmepa1, Tango6, Bend4, Ccr5
## Stoml1, Sell, Mxi1, Ube2d1, Cux1, Car2, Spock2, Cxcr4, Ifi27l2a, Vegfa
## Pgm3, Thada, Stat3, Nufip1, Cmah, Rps20, Fancl, Ptprs, Rttn, Pla2g6
## Negative: Gm36723, S100a6, Klrb1b, Klrg1, S100a4, Gpr34, Kansl1, Vim, Cep162, Rgs1
## Gmeb1, Tmem176b, Itga1, Zmym1, Sema6d, Ly6a2, Plek, Septin11, Rora, Bcl2a1d
## Capg, Cd226, Scn1b, Riok2, Nedd9, Aopep, H1f0, Immp2l, Nebl, Cd200r1
## PC_ 4
## Positive: ENSMUSG00000120570, Gm43462, Snx16, Cdc14a, Mblac2, Hook1, Osbpl8, Ripor3, Vti1a, Stx2
## Phldb2, Gm2682, Tex9, Trps1, Ddx60, Ube2t, Plcxd2, Sidt1, Aff3, Fry
## Rfx3, Zfp64, Ubash3b, Rpf2, 4932438A13Rik, Cd101, Fbxo31, Dnaaf9, Scml4, Rad17
## Negative: Irak2, Rab4a, Serpinb1a, Itgb3, Prrt1, Zfyve16, Prnp, Icos, Cd6, Ass1
## Zfp808, Fam120c, Ar, Gzmc, Cep162, Cd200r1, Cxcr6, Map2k4, AA467197, Atat1
## Dse, Osgin1, Pdcd1, Tmem87b, Senp7, Akr1b10, Arrdc4, Brd9, P2ry10, Mypop
## PC_ 5
## Positive: Slamf6, Ctla2a, Cd3d, Dpp4, Rps29, Irf2bp2, Trbc2, Gramd3, Rpl31, Ly6c2
## Nanp, Cep170, Camk4, Eif4a1, Amz2, Bcl11b, Trgc2, Klra1, Ddx60, Pign
## Rps20, Trgv2, Cd3e, H2-Ab1, Galnt12, Kbtbd11, Thy1, Rps26, Polb, Klhdc2
## Negative: Slc25a13, A430035B10Rik, Mast2, Exoc1, Uqcc1, Immp2l, Rgs9, 4930469K13Rik, ENSMUSG00000121069, Fbxo31
## Gm10033, Il1r2, Atf6, Atp8a2, Cbx8, Igf1r, Dbr1, Ugdh, Polr1e, Gm43329
## Cd86, Ttc27, Atrx, Ckap5, Tmem176b, Garin1a, Casp3, Tango6, Ccl25, Pomt2
VizDimLoadings(s4_reduced, dims = 1:2, reduction = "pca")

DimPlot(s4_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s4_reduced, dims = 1:20, cells = 500, balanced = TRUE)
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.
## Warning: Requested number is larger than the number of available items (128).
## Setting to 128.

ElbowPlot(s4_reduced)

#Clustering
s4_neighboors <- FindNeighbors(object = s4_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s4_clusters <- FindClusters(object = s4_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 128
## Number of edges: 6474
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.3126
## Number of communities: 2
## Elapsed time: 0 seconds
head(Idents(s4_clusters), 5) #cluster IDs 1st 5 cells
## 199026 301443 555700 677946 1052808
## 1 0 1 0 1
## Levels: 0 1
#UMAP
s4_umap <- RunUMAP(object = s4_clusters, dims = 1:20)
## 00:29:54 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:29:54 Read 128 rows and found 20 numeric columns
## 00:29:54 Using Annoy for neighbor search, n_neighbors = 30
## 00:29:54 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:29:54 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1612d3ef2b4
## 00:29:54 Searching Annoy index using 1 thread, search_k = 3000
## 00:29:54 Annoy recall = 100%
## 00:29:54 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:29:55 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:29:55 Commencing optimization for 500 epochs, with 4328 positive edges
## 00:29:55 Optimization finished
DimPlot(s4_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s4 <- paramSweep(s4_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s4 <- summarizeSweep(sweep.res.list_s4, GT = FALSE)
bcmvn_s4 <- find.pK(sweep.stats_s4)

## NULL
ggplot(bcmvn_s4, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.3 max
pK <- bcmvn_s4[bcmvn_s4$BCmetric == max(bcmvn_s4$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.3
#Homotypic doublet proportion estimate
annotations <- s4_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s4_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 3
#DoubletFinder
s4_finalized <- doubletFinder(s4_umap,
PCs = 1:20,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 43 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s4_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")

table(s4_finalized@meta.data$DF.classifications_0.25_0.3_3)
##
## Doublet Singlet
## 3 125
s4_finalized <- subset(s4_finalized, subset = DF.classifications_0.25_0.3_3 == "Singlet")
table(s4_finalized@meta.data$DF.classifications_0.25_0.3_3)
##
## Singlet
## 125
DimPlot(s4_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")

Sample 5 (left for later)
#Quality control
#seurat_s5[["percent.mt"]] <- PercentageFeatureSet(seurat_s5, pattern = "^mt-")
#View(seurat_s5@meta.data)
#VlnPlot(seurat_s5, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
#FeatureScatter(seurat_s5, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
#FeatureScatter(seurat_s5, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
#FeatureScatter(seurat_s5, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
#seurat_s5.filtered <- subset(seurat_s5, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10) #maybe lower percent.mt?
#FeatureScatter(seurat_s5.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
#FeatureScatter(seurat_s5.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
#FeatureScatter(seurat_s5.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
#VlnPlot(seurat_s5.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
#Normalization
#s5_normalized <- NormalizeData(object = seurat_s5.filtered)
#Integration
#s5_integrated <- FindVariableFeatures(object = s5_normalized)
#top10 <- head(VariableFeatures(s5_integrated), 10)
#top10
#plot1 <- VariableFeaturePlot(s5_integrated)
#plot1
#plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
#plot2
#Scaling
#s5_scaled <- ScaleData(object = s5_integrated)
#Linear dimensionality reduction
#s5_reduced <- RunPCA(object = s5_scaled, npcs = 10)
#VizDimLoadings(s5_reduced, dims = 1:2, reduction = "pca")
#DimPlot(s5_reduced, reduction = "pca") + NoLegend()
#DimHeatmap(s5_reduced, dims = 1:10, cells = 500, balanced = TRUE)
#ElbowPlot(s5_reduced)
#Clustering
#s5_neighboors <- FindNeighbors(object = s5_reduced, dims = 1:10)
#s5_clusters <- FindClusters(object = s5_neighboors)
#head(Idents(s5_clusters), 5) #cluster IDs 1st 5 cells
#UMAP
#s5_umap <- RunUMAP(object = s5_clusters, dims = 1:10, n.neighbors = 5)
#DimPlot(s5_umap, reduction = "umap")
#Doublet finder; UNAVAILABLE FOR THIS SAMPLE; RUN DOWNSTREAM ANALYSIS WITH AND WITHOUT S5 TO SEE THE EFFECT
#pK identification (no ground-truth)
#sweep.res.list_s5 <- paramSweep(s5_umap, PCs = 1:10, sct = FALSE) #very low cell count, gives error
#sweep.stats_s5 <- summarizeSweep(sweep.res.list_s5, GT = FALSE)
#bcmvn_s5 <- find.pK(sweep.stats_s5)
#ggplot(bcmvn_s5, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()
#0.3 max
#pK <- bcmvn_s5[bcmvn_s4$BCmetric == max(bcmvn_s5$BCmetric), "pK"]
#pK <- as.numeric(as.character(pK))
#pK
#Homotypic doublet proportion estimate
#annotations <- s5_umap@meta.data$seurat_clusters
#homotypic.prop <- modelHomotypic(annotations)
#nExp_poi <- round(0.05*nrow(s5_umap@meta.data)) ##in project description
#nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
#nExp_poi.adj #expected number of doublets
#DoubletFinder
#s5_finalized <- doubletFinder(s5_umap,
#PCs = 1:20,
#pK = pK,
#nExp = nExp_poi.adj,
#reuse.pANN = FALSE, sct = FALSE)
#DimPlot(s5_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")
#table(s5_finalized@meta.data$DF.classifications_0.25_0.3_3)
#s5_finalized <- subset(s5_finalized, subset = DF.classifications_0.25_0.3_3 == "Singlet")
#table(s5_finalized@meta.data$DF.classifications_0.25_0.3_3)
#DimPlot(s5_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.3_3")
Sample 6
#Quality control
seurat_s6[["percent.mt"]] <- PercentageFeatureSet(seurat_s6, pattern = "^mt-")
#View(seurat_s6@meta.data)
VlnPlot(seurat_s6, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s6, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

seurat_s6.filtered <- subset(seurat_s6, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)
FeatureScatter(seurat_s6.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s6.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

VlnPlot(seurat_s6.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#Normalization
s6_normalized <- NormalizeData(object = seurat_s6.filtered)
## Normalizing layer: counts
#Integration
s6_integrated <- FindVariableFeatures(object = s6_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s6_integrated), 10)
top10
## [1] "Klra5" "Gm11290" "Il7r" "Ly6c2" "Fth1" "Cxcl10" "Hspa1b"
## [8] "Ccl9" "Plac8" "Trac"
plot1 <- VariableFeaturePlot(s6_integrated)
plot1
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning in scale_x_log10(): log-10 transformation introduced infinite values.

#Scaling
s6_scaled <- ScaleData(object = s6_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s6_reduced <- RunPCA(object = s6_scaled)
## PC_ 1
## Positive: Emb, Il7r, Ly6e, Inpp4b, Gpr183, Cxcr6, Tmem176b, Rora, Ltb, Trgc4
## Itgb3, Tcf7, Tmem176a, Ly6a, Ikzf2, Itga1, Zfp36l1, Plac8, Lyst, Slco3a1
## Cd28, Ckb, Prnp, Xcl1, 2900026A02Rik, Cd7, Tnfsf10, Lilrb4b, Odc1, Icos
## Negative: Gzma, Ccl5, Gzmb, Prf1, Ccl4, Ccl3, Itgam, Lgals1, Irf8, Actb
## Cma1, Klrg1, S1pr5, ENSMUSG00000121359, ENSMUSG00000121360, Cdc20b, Plek, Itga2, Klra9, Ccnd2
## Klri2, Zeb2, Eomes, Klra4, Klra8, Rap1gap2, Serpinb9b, Klf2, H1f2, Actg1
## PC_ 2
## Positive: Mpeg1, Fcgr4, Lyz2, Cd40, Csf1r, Slc15a3, Ifitm3, Slc11a1, Ly86, Wfdc17
## Clec7a, Ms4a8a, Bst1, Mgst1, Marcks, Il1b, Cybb, Gm21188, Tnfaip2, Ifitm2
## Gda, Clec4e, Il1rn, Ms4a6d, Ccr1, Cd86, Klra2, Cd74, Ifi30, Alox5ap
## Negative: Tcf7, Cd7, Ssh2, Emb, Ctla2a, Smad7, Slc26a11, Cblb, Dennd4a, Xcl1
## Fosl2, Ipcef1, P2ry10, Cd28, Zbtb20, Mideas, Lyst, Clcn3, Itgb3, Irak2
## Ern1, Gramd3, Tcf12, Skap1, Il18r1, Arhgap15, Sidt1, Slco3a1, Jmy, Il7r
## PC_ 3
## Positive: Inpp4b, Cxcr6, Cd226, S100a6, Fgl2, Ly6c2, Actb, Hopx, Cd3g, Ly6a
## S100a4, Il7r, Cd3e, Trgc4, Lgals1, Gm36723, Ccnd2, Tmem176b, Itga1, Cd160
## Klrc1, Tmem176a, Gstp3, Cd3d, Il2ra, Trgc2, Klrg1, Jaml, Gpr183, Cx3cr1
## Negative: Eomes, Myb, Bcl2l11, Ccr2, Dhrs3, Tgfb1, Pmepa1, Stat3, Klra4, Cxcr4
## Ctla2a, Tpd52, Cux1, Nrarp, Vegfa, Plxnc1, Slc26a11, Rnf157, Sult2b1, Cemip2
## Ndrg1, Gstm1, ENSMUSG00000121360, Satb1, Car2, Tcf7, Ret, Klra8, Mideas, Smad7
## PC_ 4
## Positive: Cd3e, Cd3d, Cd3g, Nsg2, Trac, Trgc2, Trgc1, Trgv2, S1pr1, Bcl11b
## Rps20, E2f1, Tubb5, Klra6, Klra7, Tcf7, Fos, Klra1, Actn1, Efcab11
## Smco4, Pclaf, Rgs10, Themis, Cdca2, Rps26, Atad2, Clspn, Cd8a, Slc26a11
## Negative: Serpinb9, Pim1, Nfkbiz, Nr4a1, Irf8, Nabp1, Vegfa, Nr4a3, Icam1, Dgat1
## Tmem176a, Gem, Serpinb6b, Il12rb2, Rora, Tmem176b, Ifrd1, Lilrb4b, Klrb1b, Spry2
## Fosl2, Kdm6b, Prf1, Nfkbid, Dennd4a, Ccl4, Plek, Gadd45b, Crem, Il2ra
## PC_ 5
## Positive: Dusp2, Cd3e, Cd3d, Trgc1, Cxcr4, Smad7, S1pr1, ENSMUSG00000121069, Nsg2, Cd3g
## Il7r, Gramd3, Pmepa1, Trac, Trgc2, Jun, Tcf7, Ly6c2, Klf2, Bcl11b
## Thada, Zfp36l1, Cd69, Tnfaip8, C1qb, Ipcef1, Trgv2, Rgs1, Gpr155, Ifngas1
## Negative: E2f1, Mki67, Uhrf1, Pclaf, Tcf19, E2f2, Kif15, Clspn, Stmn1, Top2a
## Birc5, Ccna2, Ckap2, Rad51, Ncaph, Cdc6, Aspm, Cenpk, Tpx2, Rrm2
## Ncapg2, Ncapg, Bub1, Cdca2, Fbxo5, Bub1b, Spc24, Cenpe, Cdk1, Knl1
VizDimLoadings(s6_reduced, dims = 1:2, reduction = "pca")

DimPlot(s6_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s6_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s6_reduced)

#Clustering
s6_neighboors <- FindNeighbors(object = s6_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s6_clusters <- FindClusters(object = s6_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 926
## Number of edges: 39594
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6567
## Number of communities: 6
## Elapsed time: 0 seconds
head(Idents(s6_clusters), 5) #cluster IDs 1st 5 cells
## 37256 92053 106222 161238 184004
## 0 0 0 4 2
## Levels: 0 1 2 3 4 5
#UMAP
s6_umap <- RunUMAP(object = s6_clusters, dims = 1:20)
## 00:30:06 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:30:06 Read 926 rows and found 20 numeric columns
## 00:30:06 Using Annoy for neighbor search, n_neighbors = 30
## 00:30:06 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:30:06 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1615604ef4e
## 00:30:06 Searching Annoy index using 1 thread, search_k = 3000
## 00:30:06 Annoy recall = 100%
## 00:30:06 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:30:07 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:30:07 Commencing optimization for 500 epochs, with 35802 positive edges
## 00:30:08 Optimization finished
DimPlot(s6_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s6 <- paramSweep(s6_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s6 <- summarizeSweep(sweep.res.list_s6, GT = FALSE)
bcmvn_s6 <- find.pK(sweep.stats_s6)

## NULL
ggplot(bcmvn_s6, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.2
pK <- bcmvn_s6[bcmvn_s6$BCmetric == max(bcmvn_s6$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.2
#Homotypic doublet proportion estimate
annotations <- s6_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s6_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 37
#DoubletFinder
s6_finalized <- doubletFinder(s6_umap,
PCs = 1:20,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 309 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s6_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.2_37")

table(s6_finalized@meta.data$DF.classifications_0.25_0.2_37)
##
## Doublet Singlet
## 37 889
s6_finalized <- subset(s6_finalized, subset = DF.classifications_0.25_0.2_37 == "Singlet")
table(s6_finalized@meta.data$DF.classifications_0.25_0.2_37)
##
## Singlet
## 889
DimPlot(s6_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.2_37")

Sample 7
#Quality control
seurat_s7[["percent.mt"]] <- PercentageFeatureSet(seurat_s7, pattern = "^mt-")
#View(seurat_s7@meta.data)
VlnPlot(seurat_s7, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s7, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

seurat_s7.filtered <- subset(seurat_s7, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)
FeatureScatter(seurat_s7.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s7.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

VlnPlot(seurat_s7.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#Normalization
s7_normalized <- NormalizeData(object = seurat_s7.filtered)
## Normalizing layer: counts
#Integration
s7_integrated <- FindVariableFeatures(object = s7_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s7_integrated), 10)
top10
## [1] "Gzma" "Gzmb" "Klra8"
## [4] "Gm11290" "Ccl4" "Klra4"
## [7] "Ccl3" "ENSMUSG00000121360" "Ly6c2"
## [10] "Ccl5"
plot1 <- VariableFeaturePlot(s7_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s7_scaled <- ScaleData(object = s7_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s7_reduced <- RunPCA(object = s7_scaled)
## PC_ 1
## Positive: Rora, Ly6e, Ly6a, Inpp4b, Cxcr6, Il7r, Cd226, Tmem176b, Odc1, Gpr183
## Furin, Itgb3, Tmem176a, Emb, Hilpda, Dgat1, Lilrb4b, Gm36723, Il2ra, Trgc4
## ENSMUSG00000121239, AA467197, Hif1a, S100a4, Rbpj, Ckb, Camk4, Icos, AU020206, Scn1b
## Negative: Gzma, Ccl5, Eomes, Sell, Klra8, ENSMUSG00000121359, Itgam, Prf1, Klra4, Ccl3
## ENSMUSG00000121360, Klra9, Gzmb, Ccl4, Itga2, Klra7, Klf2, Tyrobp, Lgals1, Serpinb9b
## Irf8, Gm4956, Gstm1, Zeb2, Mcam, Itga4, Dusp2, Klra3, Actb, Cdc20b
## PC_ 2
## Positive: Klrb1b, Car2, Ncr1, Tyrobp, Irf8, AW112010, Klrb1f, Serpinb9, Vegfa, Bhlhe40
## Plek, Itgax, Sema4c, Lilrb4b, Prf1, Nabp1, Spry2, Tmem176b, Ccr2, Gm36723
## Serpina3g, Klri2, Dgat1, Scn1b, Arrdc4, Tmem176a, Nudt4, Gzma, Klrb1a, Maff
## Negative: Cd3e, Cd3d, Trac, Cd3g, Trbc2, Cd5, Bcl11b, Trgc1, Trgc2, Ly6c2
## Actn1, Dpp4, Slfn1, Gramd3, Dtx4, Ifi27l2a, Ass1, Il21r, Cd6, Peli1
## Pitpnm2, Gm8369, Lat, Cd40lg, Thada, Trgv2, Klra1, Cd8a, Klra6, Itk
## PC_ 3
## Positive: Bach2, Gm2682, Foxp1, Atp8b4, Celf2, Dennd4a, Utrn, Aff3, Mgat5, Kdm2b
## Gm38190, Vps37b, Btbd11, Spry2, Ern1, Angpt1, Pde1c, Arhgap15, Ulk2, Tcf12
## Fryl, Zeb1, Trps1, Sell, Satb1, Stag1, Arhgap26, Arhgap10, Dym, Hook2
## Negative: Tmsb4x, Cxcr6, Actg1, Ly6a, Cd8b1, Actb, Themis, S100a6, AA467197, Id2
## Cd8a, Lcp1, Gzmk, Lgals1, Gm8369, AW112010, Fam169b, St8sia1, Capg, Fgl2
## Furin, Crip1, Adam8, Nol6, Eif5a, Pdcd1, S100a4, Adam19, Gapdh, Ifitm1
## PC_ 4
## Positive: Gzmb, Tnfrsf9, Tnfsf11, Tgfb1, Ybx3, St8sia1, Rbpj, Themis, Egr2, Mif
## Chst2, Irf4, Nhlrc2, Art2b, Snhg4, Cd8b1, Zeb2, Hk2, Pdcd1, Cd8a
## Nefh, Atp13a3, Sit1, Entpd1, Lgals1, Trerf1, Zfp61, Rnf157, Ralgps2, Gm2449
## Negative: Il7r, Cd7, Dusp1, Jun, Trgc4, Ly6e, Fos, S1pr1, Lyst, Zfp36
## Gpr183, Fosb, Zfp36l1, Lmo4, Zfp36l2, Klrc1, Sidt1, Tmem176b, Slfn5, Trgc1
## Prnp, Dmrta1, Tmem176a, Rflnb, Txnip, Klf3, Senp7, Trgv1, Cxcr4, Ifi213
## PC_ 5
## Positive: Gm12185, Actb, Txnip, Vav3, 9930111J21Rik2, Id2, Epsti1, Styk1, Coro2a, Rab40c
## Rgs12, S100a4, Gm36723, Gm36445, Rab2b, Tmsb4x, Gpr34, Cd226, Mfsd8, Gab3
## Slc38a9, Irgm2, Pex2, Akap17b, Aak1, AW112010, Ly6e, Luc7l3, Xrcc5, S100a6
## Negative: Kdm6b, Nr4a1, Pim1, Nr4a3, Tnfaip3, Nfkbia, Nfkbid, Csrnp1, Fosl2, Dusp5
## Vps37b, Nfkbiz, Maff, Bhlhe40, Vegfa, Crem, Rel, Ifrd1, Gadd45b, Zc3h12a
## Dusp10, Nup98, Gpr132, Odc1, Cdkn1a, Nr4a2, Sik1, Gm12536, Mxd1, 1700008J07Rik
VizDimLoadings(s7_reduced, dims = 1:2, reduction = "pca")

DimPlot(s7_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s7_reduced, dims = 1:20, cells = 500, balanced = TRUE)
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.
## Warning: Requested number is larger than the number of available items (268).
## Setting to 268.

ElbowPlot(s7_reduced)

#Clustering
s7_neighboors <- FindNeighbors(object = s7_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s7_clusters <- FindClusters(object = s7_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 268
## Number of edges: 11874
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.5993
## Number of communities: 3
## Elapsed time: 0 seconds
head(Idents(s7_clusters), 5) #cluster IDs 1st 5 cells
## 407927 519089 829786 1238414 1285777
## 1 0 2 0 2
## Levels: 0 1 2
#UMAP
s7_umap <- RunUMAP(object = s7_clusters, dims = 1:20)
## 00:30:24 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:30:24 Read 268 rows and found 20 numeric columns
## 00:30:24 Using Annoy for neighbor search, n_neighbors = 30
## 00:30:24 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:30:24 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1614cb863cf
## 00:30:24 Searching Annoy index using 1 thread, search_k = 3000
## 00:30:24 Annoy recall = 100%
## 00:30:24 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:30:25 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:30:25 Commencing optimization for 500 epochs, with 9456 positive edges
## 00:30:25 Optimization finished
DimPlot(s7_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s7 <- paramSweep(s7_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s7 <- summarizeSweep(sweep.res.list_s7, GT = FALSE)
bcmvn_s7 <- find.pK(sweep.stats_s7)

## NULL
ggplot(bcmvn_s7, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.16
pK <- bcmvn_s7[bcmvn_s7$BCmetric == max(bcmvn_s7$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.16
#Homotypic doublet proportion estimate
annotations <- s7_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s7_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 9
#DoubletFinder
s7_finalized <- doubletFinder(s7_umap,
PCs = 1:20,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 89 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s7_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.16_9")

table(s7_finalized@meta.data$DF.classifications_0.25_0.16_9)
##
## Doublet Singlet
## 9 259
s7_finalized <- subset(s7_finalized, subset = DF.classifications_0.25_0.16_9 == "Singlet")
table(s7_finalized@meta.data$DF.classifications_0.25_0.16_9)
##
## Singlet
## 259
DimPlot(s7_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.16_9")

Sample 8
#Quality control
seurat_s8[["percent.mt"]] <- PercentageFeatureSet(seurat_s8, pattern = "^mt-")
#View(seurat_s8@meta.data)
VlnPlot(seurat_s8, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

FeatureScatter(seurat_s8, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

seurat_s8.filtered <- subset(seurat_s8, subset = nFeature_RNA >= 200 & nFeature_RNA <= 4000 & percent.mt < 10)
FeatureScatter(seurat_s8.filtered, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8.filtered, feature1 = "nCount_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

FeatureScatter(seurat_s8.filtered, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'

VlnPlot(seurat_s8.filtered, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.

#Normalization
s8_normalized <- NormalizeData(object = seurat_s8.filtered)
## Normalizing layer: counts
#VFs
s8_integrated <- FindVariableFeatures(object = s8_normalized)
## Finding variable features for layer counts
top10 <- head(VariableFeatures(s8_integrated), 10)
top10
## [1] "Gm11290" "Cxcl10" "Ifitm1" "Ccl3" "Ly6c2" "Trac" "Ccl4"
## [8] "Gzmc" "Plac8" "Gzmb"
plot1 <- VariableFeaturePlot(s8_integrated)
plot1

plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) #labeled(extra swag)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2

#Scaling
s8_scaled <- ScaleData(object = s8_integrated)
## Centering and scaling data matrix
#Linear dimensionality reduction
s8_reduced <- RunPCA(object = s8_scaled)
## PC_ 1
## Positive: Ly6e, Il7r, Ly6a, Emb, Cxcr6, Rora, Ifitm2, Gpr183, Lilrb4b, Slfn1
## Plac8, Inpp4b, Ltb, Tmem176b, Jaml, Itgb3, Camk4, Cd226, Trgc4, Rbpj
## Ikzf2, Ifitm3, Csf3r, Mpeg1, Ptafr, Cd3e, Gm21188, Ifi27l2a, Lilrb4a, Ctss
## Negative: Gzma, Prf1, Ccl5, Gzmb, Irf8, Itgam, Cma1, Eomes, Ccl4, Ccl3
## ENSMUSG00000121359, Lgals1, Klri2, Klra8, Klf2, ENSMUSG00000121360, Sell, Klra9, Klrg1, Serpinb9b
## Cdc20b, Klra4, S1pr5, Plek, Actb, Flna, Serpinb9, Dusp2, Arsb, Klra3
## PC_ 2
## Positive: Il1b, Pla2g7, Csf3r, Mpeg1, Ptafr, F10, Gm21188, Ly6i, Cybb, Clec4a1
## Pld4, Ube2l6, Il6ra, Tlr7, Clec4a3, Tbc1d9, Fcgr4, Spi1, Lyz2, Sirpa
## Gk, Csf2rb, Marcks, Pirb, Lgmn, ENSMUSG00000120878, Apoe, Ly86, Cd300c2, Cd68
## Negative: Il7r, Emb, Tcf7, Inpp4b, Cd7, Gpr183, Cd226, Itga1, Ltb, Rora
## Trgc4, Thy1, Lyst, Ikzf2, Cxcr6, Camk4, Trbc2, Cd3e, Xcl1, Cd3d
## Tmem176a, Ly6e, Tnfsf10, Gramd3, Cd3g, Icos, Itgb3, Cd160, Tmem176b, Zfp36l1
## PC_ 3
## Positive: Vegfa, Bcl2l11, Dhrs3, Fosl2, Sft2d2, Eomes, Myb, Ccr2, Stat3, Satb1
## Car2, Gpr132, Rnf157, Irak2, Tpd52, Cxcr4, Irf8, Tgfb1, Ctla2a, Skil
## Dennd4a, Emb, Slc26a11, Sell, Irs2, Bach2, Malt1, Dgat1, Cemip2, Ern1
## Negative: Cd3e, Ly6c2, Cd3g, Cd3d, Actb, S100a6, Cd8b1, Cd8a, Trac, Adam19
## Trbc2, Pdcd1, Cx3cr1, Trgc1, Jaml, S100a4, Trgc2, Cxcr6, Cd226, Ccnd2
## Cd5, Slfn1, Fgl2, Ifit1bl1, Trgv2, Bcl11b, Cd160, Dpp4, Trat1, Vim
## PC_ 4
## Positive: Tmem176b, Tmem176a, Ckb, Adgrg3, Klrb1b, Ccdc184, Asb2, Gm36723, Dscam, Trgc4
## Scn1b, Sema6d, Fgl2, Krt83, Adam8, Psd2, 2900026A02Rik, Arrdc4, Lilrb4a, Serpinb1a
## Furin, H1f0, Gzmc, Prnp, Klrc1, Cd226, Adgrg5, Cd160, Inpp4b, Cep290
## Negative: Cd3e, Oas3, Ifit1bl1, Ifit3, Cd3d, Gm8369, Ifit1, Slfn1, Trac, Dpp4
## Cmpk2, Cd8a, Mx1, Ifit3b, Ifi27l2a, Rtp4, Cd8b1, A330040F15Rik, Cd6, Cd3g
## Mx2, Slfn5, Phf11c, Ifi208, Ifi213, Ifi206, Ifit2, S1pr1, Ifi209, Cxcl10
## PC_ 5
## Positive: Tcf7, Ctla2a, Osgin1, Pmepa1, Jun, Slc26a11, Fos, Fth1, Cd7, Psap
## Tspan13, Cited4, Gpr155, Isg20, Car2, Xcl1, Ftl1, Filip1l, Klra4, Mfsd6
## 1700017B05Rik, Rgs10, Camk2n1, Spsb1, Ccr2, Eya2, Trdc, Ly6e, Ighm, Rgs16
## Negative: Serpinb9, Nr4a1, Mki67, Lgals1, Gzmb, Il12rb2, Sgk1, Kif11, Pclaf, Flna
## Birc5, Asf1b, Pola1, Plek, Nfkbia, Cdca3, Top2a, Ckap2l, Actb, Nfkbiz
## Prf1, Bard1, Ccl3, Stmn1, Anln, Brip1, Cx3cr1, Spc24, Magt1, Ccna2
VizDimLoadings(s8_reduced, dims = 1:2, reduction = "pca")

DimPlot(s8_reduced, reduction = "pca") + NoLegend()

DimHeatmap(s8_reduced, dims = 1:20, cells = 500, balanced = TRUE)

ElbowPlot(s8_reduced)

#Clustering
s8_neighboors <- FindNeighbors(object = s8_reduced, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
s8_clusters <- FindClusters(object = s8_neighboors)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 640
## Number of edges: 26292
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6579
## Number of communities: 5
## Elapsed time: 0 seconds
head(Idents(s8_clusters), 5) #cluster IDs 1st 5 cells
## 53570 215374 353164 525991 531003
## 3 0 1 2 3
## Levels: 0 1 2 3 4
#UMAP
s8_umap <- RunUMAP(object = s8_clusters, dims = 1:20)
## 00:30:37 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:30:37 Read 640 rows and found 20 numeric columns
## 00:30:37 Using Annoy for neighbor search, n_neighbors = 30
## 00:30:37 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:30:37 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1611e4d108b
## 00:30:37 Searching Annoy index using 1 thread, search_k = 3000
## 00:30:37 Annoy recall = 100%
## 00:30:37 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:30:38 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:30:38 Commencing optimization for 500 epochs, with 24348 positive edges
## 00:30:38 Optimization finished
DimPlot(s8_umap, reduction = "umap")

#Doublet finder
#pK identification (no ground-truth)
sweep.res.list_s8 <- paramSweep(s8_umap, PCs = 1:20, sct = FALSE)
## [1] "Creating artificial doublets for pN = 5%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 10%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 15%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 20%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 25%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
## [1] "Creating artificial doublets for pN = 30%"
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Defining neighborhoods..."
## [1] "Computing pANN across all pK..."
## [1] "pK = 0.02..."
## [1] "pK = 0.03..."
## [1] "pK = 0.04..."
## [1] "pK = 0.05..."
## [1] "pK = 0.06..."
## [1] "pK = 0.07..."
## [1] "pK = 0.08..."
## [1] "pK = 0.09..."
## [1] "pK = 0.1..."
## [1] "pK = 0.11..."
## [1] "pK = 0.12..."
## [1] "pK = 0.13..."
## [1] "pK = 0.14..."
## [1] "pK = 0.15..."
## [1] "pK = 0.16..."
## [1] "pK = 0.17..."
## [1] "pK = 0.18..."
## [1] "pK = 0.19..."
## [1] "pK = 0.2..."
## [1] "pK = 0.21..."
## [1] "pK = 0.22..."
## [1] "pK = 0.23..."
## [1] "pK = 0.24..."
## [1] "pK = 0.25..."
## [1] "pK = 0.26..."
## [1] "pK = 0.27..."
## [1] "pK = 0.28..."
## [1] "pK = 0.29..."
## [1] "pK = 0.3..."
sweep.stats_s8 <- summarizeSweep(sweep.res.list_s8, GT = FALSE)
bcmvn_s8 <- find.pK(sweep.stats_s8)

## NULL
ggplot(bcmvn_s8, aes(pK, BCmetric, group = 1)) + geom_point() + geom_line()

#0.15
pK <- bcmvn_s8[bcmvn_s8$BCmetric == max(bcmvn_s8$BCmetric), "pK"]
pK <- as.numeric(as.character(pK))
pK
## [1] 0.15
#Homotypic doublet proportion estimate
annotations <- s8_umap@meta.data$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(0.05*nrow(s8_umap@meta.data)) ##in project description
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
nExp_poi.adj #expected number of doublets
## [1] 24
#DoubletFinder
s8_finalized <- doubletFinder(s8_umap,
PCs = 1:20,
pK = pK,
nExp = nExp_poi.adj,
reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 213 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## Normalizing layer: counts
## [1] "Finding variable genes..."
## Finding variable features for layer counts
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
DimPlot(s8_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.15_24")

table(s8_finalized@meta.data$DF.classifications_0.25_0.15_24)
##
## Doublet Singlet
## 24 616
s8_finalized <- subset(s8_finalized, subset = DF.classifications_0.25_0.15_24 == "Singlet")
table(s8_finalized@meta.data$DF.classifications_0.25_0.15_24)
##
## Singlet
## 616
DimPlot(s8_finalized, reduction = 'umap', group.by = "DF.classifications_0.25_0.15_24")

Proceeding with the steps in the Seurat documentation on
integration.
merged_object <- NormalizeData(merged_object)
## Normalizing layer: counts.Sample_1
## Normalizing layer: counts.Sample_2
## Normalizing layer: counts.Sample_3
## Normalizing layer: counts.Sample_4
## Normalizing layer: counts.Sample_6
## Normalizing layer: counts.Sample_7
## Normalizing layer: counts.Sample_8
merged_object <- FindVariableFeatures(merged_object)
## Finding variable features for layer counts.Sample_1
## Finding variable features for layer counts.Sample_2
## Finding variable features for layer counts.Sample_3
## Finding variable features for layer counts.Sample_4
## Finding variable features for layer counts.Sample_6
## Finding variable features for layer counts.Sample_7
## Finding variable features for layer counts.Sample_8
merged_object <- ScaleData(merged_object)
## Centering and scaling data matrix
merged_object <- RunPCA(merged_object)
## PC_ 1
## Positive: Emb, Il7r, Ly6e, Gpr183, Tcf7, Inpp4b, Cxcr6, Rora, Trgc4, Ltb
## Ikzf2, Itgb3, Camk4, Cd3e, Cd28, Lyst, Gramd3, Ly6a, Thy1, Cd3g
## Itga1, Tmem176b, Zfp36l1, Cd7, Gpr132, Cd3d, Plac8, Lilrb4b, Tmem176a, Cxcr3
## Negative: Gzma, Prf1, Gzmb, Ccl5, Ccl3, Ccl4, Irf8, Itgam, Cma1, Lgals1
## Actb, Plek, Itgb2, Klrg1, S1pr5, ENSMUSG00000121359, Emp3, Klra9, Flna, Cdc20b
## ENSMUSG00000121360, Zeb2, Ncr1, Itga2, Sgk1, Eomes, Serpinb9b, Klra8, Ccnd2, Klri2
## PC_ 2
## Positive: S100a6, Actb, Ly6c2, Cd52, Cd226, Cxcr6, S100a4, Fgl2, Ly6a, Itgb2
## mt-Nd4, Inpp4b, Hopx, Trgc4, Il7r, Ncr1, Cd3e, Cd3g, Ccnd2, Cd160
## Tmem176b, Lgals1, Cd3d, Tmem176a, Gm36723, Vim, Slamf7, Jaml, Adam8, Klrc1
## Negative: Eomes, Dhrs3, Myb, Bcl2l11, Pmepa1, Stat3, Tgfb1, Cemip2, Ccr2, Ctla2a
## Klra4, Slc26a11, Plxnc1, Vegfa, Tspan13, Rnf157, Sft2d2, Car2, Cxcr4, Satb1
## Klra8, Tpd52, ENSMUSG00000121360, Skil, Slc3a2, Gstm1, Sell, Smad7, Sult2b1, Bach2
## PC_ 3
## Positive: Serpinb9, Bhlhe40, Vegfa, Nr4a1, Pim1, Irf8, Nfkbiz, Nfkbia, Vps37b, Dusp5
## Nr4a3, Nabp1, Ifrd1, Kdm6b, Fosl2, Dgat1, Spry2, Serpinb6b, Icam1, Nfkbid
## Klrb1b, Tmem176b, Tmem176a, Mxd1, Lilrb4b, Birc3, Tnfrsf9, Plek, Crem, Gem
## Negative: Cd3e, Cd3g, Cd3d, Trgc2, Trac, Trgc1, Trgv2, Fos, Dpp4, S1pr1
## Klra6, Slfn1, Gm8369, Actn1, Klf2, Cd8a, Bcl11b, Cd5, Jun, Plcg1
## Cd6, Klra1, Trbc2, Ifi208, Hdgfl3, Rgs10, Peli1, Themis, Tcf7, Pmepa1
## PC_ 4
## Positive: Fth1, Pmepa1, Vegfa, AA467197, Plac8, Tspan13, Aldoa, Osgin1, Dhrs3, Cdkn1a
## Dgat1, Rbpj, Scn1b, Prdx6, 1700017B05Rik, Spsb1, Odc1, Rgs16, Adam8, Nrarp
## Ly6a, Ctla2a, Cish, Tgfb1, Prnp, Itgb3, Actg1, Lgals3, Cemip2, Smox
## Negative: Gm2682, Ripor2, Add3, Fos, Arhgap15, Dock10, Pik3r1, Peak1, Rap1gap2, Prkcq
## Elmo1, Pitpnc1, Utrn, Dock11, Plek, Itga4, Fosb, Tnik, Atp8b4, Arsb
## Ncr1, Scml4, Ccnd3, Dennd4a, Itga2, Atp2b1, Cblb, Epsti1, Klf6, Gm26740
## PC_ 5
## Positive: Cd3e, Trac, Cd3d, Cd3g, Kdm6b, Nr4a3, Nr4a1, Gramd3, Pim1, Fosl2
## Vps37b, Nr4a2, Nfkbia, Trgc2, Nfkbid, Ifrd1, Nfkbiz, Cd5, Crem, Dpp4
## Trbc2, Dusp5, Trgc1, Trgv2, Slfn1, Cd6, Actn1, Gna13, Icam1, Ccl5
## Negative: Tmem176b, Tmem176a, Klrb1b, Ckb, Gm36723, Cited4, Jun, Scn1b, Dscam, Klrb1f
## Acpp, Asb2, Adgrg3, Id2, Ncr1, Car2, Gpr34, Prnp, Arrdc4, Sema6d
## ENSMUSG00000121239, Gas7, 2900026A02Rik, Fos, 9930111J21Rik2, Vegfc, AU020206, Itgax, Adam8, Krt83
merged_object <- FindNeighbors(merged_object, dims = 1:20, reduction = "pca")
## Computing nearest neighbor graph
## Computing SNN
merged_object <- FindClusters(merged_object, resolution = 2, cluster.name = "unintegrated_clusters")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5183
## Number of edges: 176050
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6386
## Number of communities: 18
## Elapsed time: 0 seconds
merged_object <- RunUMAP(merged_object, dims = 1:20, reduction = "pca", reduction.name = "umap.unintegrated")
## 00:31:04 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:31:04 Read 5183 rows and found 20 numeric columns
## 00:31:04 Using Annoy for neighbor search, n_neighbors = 30
## 00:31:04 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:31:04 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec16120b57951
## 00:31:04 Searching Annoy index using 1 thread, search_k = 3000
## 00:31:05 Annoy recall = 100%
## 00:31:06 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:31:06 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:31:06 Commencing optimization for 500 epochs, with 212392 positive edges
## 00:31:09 Optimization finished
DimPlot(merged_object, reduction ="umap.unintegrated", group.by = c("orig.ident", "seurat_clusters"))

#Integration
merged_object <- IntegrateLayers(object = merged_object, method = CCAIntegration, orig.reduction = "pca", new.reduction = "integrated.cca",
verbose = FALSE)
merged_object <- JoinLayers(merged_object)
head(merged_object)
## orig.ident nCount_RNA nFeature_RNA percent.mt RNA_snn_res.0.8
## s1_14310 Sample_1 5267 2239 3.284602 1
## s1_38613 Sample_1 7072 2749 2.969457 0
## s1_44195 Sample_1 4280 1954 2.663551 0
## s1_93453 Sample_1 4943 1877 4.733967 3
## s1_157761 Sample_1 4008 1729 6.087824 0
## s1_214830 Sample_1 7224 2644 4.332780 3
## s1_230825 Sample_1 5366 2217 4.714871 3
## s1_276993 Sample_1 5506 2442 2.760625 5
## s1_300567 Sample_1 3749 1862 3.974393 2
## s1_376984 Sample_1 5816 2263 5.140990 0
## seurat_clusters pANN_0.25_0.29_67 DF.classifications_0.25_0.29_67
## s1_14310 0 0.2087227 Singlet
## s1_38613 11 0.3255452 Singlet
## s1_44195 1 0.2040498 Singlet
## s1_93453 8 0.1869159 Singlet
## s1_157761 1 0.2056075 Singlet
## s1_214830 9 0.1666667 Singlet
## s1_230825 9 0.2196262 Singlet
## s1_276993 6 0.3146417 Singlet
## s1_300567 3 0.1121495 Singlet
## s1_376984 1 0.2274143 Singlet
## pANN_0.25_0.11_34 DF.classifications_0.25_0.11_34 pANN_0.25_0.25_31
## s1_14310 NA <NA> NA
## s1_38613 NA <NA> NA
## s1_44195 NA <NA> NA
## s1_93453 NA <NA> NA
## s1_157761 NA <NA> NA
## s1_214830 NA <NA> NA
## s1_230825 NA <NA> NA
## s1_276993 NA <NA> NA
## s1_300567 NA <NA> NA
## s1_376984 NA <NA> NA
## DF.classifications_0.25_0.25_31 pANN_0.25_0.3_3
## s1_14310 <NA> NA
## s1_38613 <NA> NA
## s1_44195 <NA> NA
## s1_93453 <NA> NA
## s1_157761 <NA> NA
## s1_214830 <NA> NA
## s1_230825 <NA> NA
## s1_276993 <NA> NA
## s1_300567 <NA> NA
## s1_376984 <NA> NA
## DF.classifications_0.25_0.3_3 pANN_0.25_0.2_37
## s1_14310 <NA> NA
## s1_38613 <NA> NA
## s1_44195 <NA> NA
## s1_93453 <NA> NA
## s1_157761 <NA> NA
## s1_214830 <NA> NA
## s1_230825 <NA> NA
## s1_276993 <NA> NA
## s1_300567 <NA> NA
## s1_376984 <NA> NA
## DF.classifications_0.25_0.2_37 pANN_0.25_0.16_9
## s1_14310 <NA> NA
## s1_38613 <NA> NA
## s1_44195 <NA> NA
## s1_93453 <NA> NA
## s1_157761 <NA> NA
## s1_214830 <NA> NA
## s1_230825 <NA> NA
## s1_276993 <NA> NA
## s1_300567 <NA> NA
## s1_376984 <NA> NA
## DF.classifications_0.25_0.16_9 pANN_0.25_0.15_24
## s1_14310 <NA> NA
## s1_38613 <NA> NA
## s1_44195 <NA> NA
## s1_93453 <NA> NA
## s1_157761 <NA> NA
## s1_214830 <NA> NA
## s1_230825 <NA> NA
## s1_276993 <NA> NA
## s1_300567 <NA> NA
## s1_376984 <NA> NA
## DF.classifications_0.25_0.15_24 unintegrated_clusters
## s1_14310 <NA> 0
## s1_38613 <NA> 11
## s1_44195 <NA> 1
## s1_93453 <NA> 8
## s1_157761 <NA> 1
## s1_214830 <NA> 9
## s1_230825 <NA> 9
## s1_276993 <NA> 6
## s1_300567 <NA> 3
## s1_376984 <NA> 1
tail(merged_object)
## orig.ident nCount_RNA nFeature_RNA percent.mt RNA_snn_res.0.8
## s8_55985941 Sample_8 3697 1681 7.817149 0
## s8_56094760 Sample_8 4949 2041 4.485755 3
## s8_56193215 Sample_8 2666 1376 1.312828 4
## s8_56197150 Sample_8 2780 1503 4.820144 3
## s8_56216117 Sample_8 4579 2018 3.734440 3
## s8_56294783 Sample_8 4866 2143 5.219893 0
## s8_56317073 Sample_8 2623 1308 5.871140 0
## s8_56326298 Sample_8 3291 1551 5.408690 1
## s8_56483446 Sample_8 3930 1913 4.249364 0
## s8_56571947 Sample_8 3873 1821 4.441002 0
## seurat_clusters pANN_0.25_0.29_67 DF.classifications_0.25_0.29_67
## s8_55985941 3 NA <NA>
## s8_56094760 6 NA <NA>
## s8_56193215 5 NA <NA>
## s8_56197150 6 NA <NA>
## s8_56216117 6 NA <NA>
## s8_56294783 2 NA <NA>
## s8_56317073 2 NA <NA>
## s8_56326298 4 NA <NA>
## s8_56483446 3 NA <NA>
## s8_56571947 3 NA <NA>
## pANN_0.25_0.11_34 DF.classifications_0.25_0.11_34 pANN_0.25_0.25_31
## s8_55985941 NA <NA> NA
## s8_56094760 NA <NA> NA
## s8_56193215 NA <NA> NA
## s8_56197150 NA <NA> NA
## s8_56216117 NA <NA> NA
## s8_56294783 NA <NA> NA
## s8_56317073 NA <NA> NA
## s8_56326298 NA <NA> NA
## s8_56483446 NA <NA> NA
## s8_56571947 NA <NA> NA
## DF.classifications_0.25_0.25_31 pANN_0.25_0.3_3
## s8_55985941 <NA> NA
## s8_56094760 <NA> NA
## s8_56193215 <NA> NA
## s8_56197150 <NA> NA
## s8_56216117 <NA> NA
## s8_56294783 <NA> NA
## s8_56317073 <NA> NA
## s8_56326298 <NA> NA
## s8_56483446 <NA> NA
## s8_56571947 <NA> NA
## DF.classifications_0.25_0.3_3 pANN_0.25_0.2_37
## s8_55985941 <NA> NA
## s8_56094760 <NA> NA
## s8_56193215 <NA> NA
## s8_56197150 <NA> NA
## s8_56216117 <NA> NA
## s8_56294783 <NA> NA
## s8_56317073 <NA> NA
## s8_56326298 <NA> NA
## s8_56483446 <NA> NA
## s8_56571947 <NA> NA
## DF.classifications_0.25_0.2_37 pANN_0.25_0.16_9
## s8_55985941 <NA> NA
## s8_56094760 <NA> NA
## s8_56193215 <NA> NA
## s8_56197150 <NA> NA
## s8_56216117 <NA> NA
## s8_56294783 <NA> NA
## s8_56317073 <NA> NA
## s8_56326298 <NA> NA
## s8_56483446 <NA> NA
## s8_56571947 <NA> NA
## DF.classifications_0.25_0.16_9 pANN_0.25_0.15_24
## s8_55985941 <NA> 0.0468750
## s8_56094760 <NA> 0.1796875
## s8_56193215 <NA> 0.1328125
## s8_56197150 <NA> 0.1640625
## s8_56216117 <NA> 0.2265625
## s8_56294783 <NA> 0.0937500
## s8_56317073 <NA> 0.0468750
## s8_56326298 <NA> 0.0468750
## s8_56483446 <NA> 0.1406250
## s8_56571947 <NA> 0.0468750
## DF.classifications_0.25_0.15_24 unintegrated_clusters
## s8_55985941 Singlet 3
## s8_56094760 Singlet 6
## s8_56193215 Singlet 5
## s8_56197150 Singlet 6
## s8_56216117 Singlet 6
## s8_56294783 Singlet 2
## s8_56317073 Singlet 2
## s8_56326298 Singlet 4
## s8_56483446 Singlet 3
## s8_56571947 Singlet 3
merged_object <- FindNeighbors(merged_object, reduction = "integrated.cca", dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
merged_object <- FindClusters(merged_object, resolution = 0.5, cluster.name = "integrated_clusters") #keep lower; increase based on cond
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5183
## Number of edges: 222642
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8181
## Number of communities: 10
## Elapsed time: 0 seconds
merged_object <- RunUMAP(merged_object, dims = 1:20, reduction = "integrated.cca")
## 00:31:56 UMAP embedding parameters a = 0.9922 b = 1.112
## 00:31:56 Read 5183 rows and found 20 numeric columns
## 00:31:56 Using Annoy for neighbor search, n_neighbors = 30
## 00:31:56 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 00:31:56 Writing NN index file to temp file /var/folders/gg/3wtfy73j3rl40n0qjrdl80bc0000gn/T//RtmpWpAefw/filec1616a2fc59d
## 00:31:56 Searching Annoy index using 1 thread, search_k = 3000
## 00:31:57 Annoy recall = 100%
## 00:31:57 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 00:31:58 Initializing from normalized Laplacian + noise (using RSpectra)
## 00:31:58 Commencing optimization for 500 epochs, with 218726 positive edges
## 00:32:01 Optimization finished
DimPlot(merged_object, reduction = "umap", group.by = c("orig.ident", "seurat_clusters")) #should be c("orig.ident","seurat_annotations")

DimPlot(merged_object, reduction = "umap", split.by = "orig.ident")

Conserved markers by clusters
Cluster 0
markers_c0 <- FindConservedMarkers(merged_object, ident.1 = 0, grouping.var = "orig.ident")
## Testing group Sample_4: (0) vs (2, 1, 3, 4, 5, 7)
## Testing group Sample_7: (0) vs (4, 3, 5, 2, 1, 7, 6)
## Testing group Sample_8: (0) vs (4, 7, 5, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (0) vs (5, 1, 2, 4, 3, 7, 6, 8, 9)
## Testing group Sample_2: (0) vs (1, 3, 2, 5, 4, 6, 9, 7, 8)
## Testing group Sample_6: (0) vs (4, 1, 5, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (0) vs (1, 2, 4, 3, 6, 5, 7, 8, 9)
head(markers_c0)
## Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Gzma 1.215047e-12 1.6655519 1.000 0.747
## Gzmb 5.411346e-12 2.3091115 1.000 0.526
## Tmsb4x 1.745308e-11 0.9833812 1.000 1.000
## Prf1 2.605209e-14 2.3996941 1.000 0.674
## Actb 6.355907e-11 1.1465537 1.000 1.000
## Emb 8.824234e-07 -3.9337882 0.133 0.653
## Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Gzma 2.151362e-08 1.027687e-11 2.7644970 0.962
## Gzmb 9.581330e-08 1.822884e-15 3.8989895 0.923
## Tmsb4x 3.090242e-07 1.425675e-03 0.6393013 1.000
## Prf1 4.612783e-10 3.557915e-11 2.6507811 0.962
## Actb 1.125377e-06 7.407543e-08 1.1707625 1.000
## Emb 1.562419e-02 1.081180e-08 -2.8772127 0.346
## Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Gzma 0.635 1.819622e-07 7.639380e-57 1.7817571
## Gzmb 0.318 3.227599e-11 1.856060e-53 2.6930772
## Tmsb4x 1.000 1.000000e+00 3.685610e-38 0.8943607
## Prf1 0.511 6.299643e-07 3.744258e-48 2.0622162
## Actb 1.000 1.311580e-03 6.413922e-41 1.0543377
## Emb 0.841 1.914337e-04 4.428778e-34 -3.7753774
## Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Gzma 1.000 0.784 1.352629e-52 6.591215e-61
## Gzmb 1.000 0.532 3.286340e-49 8.861015e-63
## Tmsb4x 1.000 1.000 6.525742e-34 5.382758e-58
## Prf1 0.974 0.673 6.629584e-44 1.138935e-56
## Actb 1.000 0.998 1.135649e-36 4.943648e-54
## Emb 0.188 0.729 7.841594e-30 6.967848e-43
## Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Gzma 1.308551 0.995 0.867 1.167040e-56
## Gzmb 2.620633 0.977 0.597 1.568931e-58
## Tmsb4x 1.026097 1.000 1.000 9.530711e-54
## Prf1 1.798592 0.991 0.681 2.016599e-52
## Actb 1.023439 1.000 1.000 8.753223e-50
## Emb -3.939754 0.156 0.679 1.233727e-38
## Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Gzma 5.195224e-65 1.2633827 1.000 0.885
## Gzmb 1.257317e-59 2.3616030 0.966 0.612
## Tmsb4x 2.013274e-64 0.9987883 1.000 1.000
## Prf1 4.482487e-59 1.7510673 0.971 0.672
## Actb 8.633681e-59 1.0514522 1.000 0.997
## Emb 1.659946e-42 -3.5009636 0.172 0.675
## Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Gzma 9.198663e-61 1.222551e-68 1.3095673 1.000
## Gzmb 2.226205e-55 1.301391e-76 2.3452182 0.992
## Tmsb4x 3.564704e-60 2.738814e-69 0.8986653 1.000
## Prf1 7.936692e-55 5.136174e-68 1.7117577 0.992
## Actb 1.528680e-54 6.939773e-58 0.9783775 1.000
## Emb 2.939100e-38 2.492018e-47 -3.6481565 0.131
## Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Gzma 0.865 2.164650e-64 8.252536e-119 1.3028958
## Gzmb 0.590 2.304244e-72 7.981280e-117 2.4144417
## Tmsb4x 1.000 4.849343e-65 4.468515e-107 0.8454760
## Prf1 0.739 9.094110e-64 4.594595e-106 1.6442724
## Actb 1.000 1.228756e-53 9.625332e-99 0.9857227
## Emb 0.667 4.412367e-43 4.121046e-83 -3.3484939
## Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj max_pval
## Gzma 1.000 0.897 1.461194e-114 1.027687e-11
## Gzmb 0.963 0.650 1.413166e-112 5.411346e-12
## Tmsb4x 1.000 1.000 7.911952e-103 1.425675e-03
## Prf1 0.987 0.751 8.135189e-102 3.557915e-11
## Actb 1.000 1.000 1.704261e-94 7.407543e-08
## Emb 0.152 0.682 7.296725e-79 8.824234e-07
## minimump_p_val
## Gzma 5.776776e-118
## Gzmb 5.586896e-116
## Tmsb4x 3.127960e-106
## Prf1 3.216216e-105
## Actb 6.737733e-98
## Emb 2.884732e-82
FeaturePlot(merged_object, features = c("Gzma", "Gzmb", "Tmsb4x", "Prf1", "Actb", "Emb"), min.cutoff = 'q10', reduction = "umap")

Cluster 1
markers_c1 <- FindConservedMarkers(merged_object, ident.1 = 1, grouping.var = "orig.ident")
## Testing group Sample_4: (1) vs (2, 3, 0, 4, 5, 7)
## Testing group Sample_7: (1) vs (4, 3, 5, 2, 0, 7, 6)
## Testing group Sample_8: (1) vs (4, 0, 7, 5, 2, 9, 3, 6, 8)
## Testing group Sample_3: (1) vs (5, 2, 4, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (1) vs (3, 2, 5, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (1) vs (0, 4, 5, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (1) vs (2, 0, 4, 3, 6, 5, 7, 8, 9)
head(markers_c1)
## Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Ccr2 1.477598e-06 1.4508060 0.829 0.500
## Ccl5 5.228492e-01 -0.5297467 1.000 0.929
## Gzmb 1.241403e-01 -1.1658451 0.634 0.643
## Cd7 1.154982e-05 1.0281320 0.927 0.488
## Rap1b 2.415908e-02 -0.5749798 0.976 0.857
## Ctla2a 4.300499e-04 0.8509587 0.732 0.488
## Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Ccr2 0.02616235 1.196308e-08 1.2062496 0.877
## Ccl5 1.00000000 9.755418e-01 -0.4906165 0.892
## Gzmb 1.00000000 7.180655e-01 -1.1044222 0.369
## Cd7 0.20450120 2.706668e-02 0.2405162 0.908
## Rap1b 1.00000000 1.172166e-02 -0.4312940 0.846
## Ctla2a 1.00000000 2.046767e-01 0.2818947 0.723
## Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Ccr2 0.567 0.0002118184 2.415408e-21 1.3066011
## Ccl5 0.840 1.0000000000 5.815502e-08 -0.9817531
## Gzmb 0.381 1.0000000000 7.207646e-08 -1.8596237
## Cd7 0.758 1.0000000000 2.160264e-09 0.5157704
## Rap1b 0.902 1.0000000000 8.438946e-05 -0.5819957
## Ctla2a 0.716 1.0000000000 4.366782e-11 0.7641169
## Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Ccr2 0.885 0.560 4.276721e-17 4.161815e-26
## Ccl5 0.980 0.927 1.029693e-03 2.822960e-13
## Gzmb 0.547 0.682 1.276186e-03 1.194314e-07
## Cd7 0.899 0.652 3.824963e-05 4.549868e-14
## Rap1b 0.838 0.889 1.000000e+00 2.766807e-05
## Ctla2a 0.784 0.489 7.731824e-07 3.742153e-10
## Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Ccr2 1.2609879 0.842 0.525 7.368910e-22
## Ccl5 -0.7440240 0.986 0.980 4.998333e-09
## Gzmb -1.4818090 0.637 0.720 2.114652e-03
## Cd7 0.5950440 0.833 0.558 8.055997e-10
## Rap1b -0.4902986 0.842 0.887 4.898909e-01
## Ctla2a 0.7045460 0.726 0.510 6.625857e-06
## Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Ccr2 2.016472e-33 1.2684343 0.894 0.542
## Ccl5 8.951389e-15 -0.7258744 0.992 0.969
## Gzmb 1.177891e-08 -1.4104502 0.661 0.724
## Cd7 3.952732e-21 0.9196994 0.852 0.533
## Rap1b 4.920275e-06 -0.5265137 0.822 0.885
## Ctla2a 3.683701e-12 0.9311525 0.691 0.444
## Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Ccr2 3.570365e-29 6.660988e-45 1.4913377 0.932
## Ccl5 1.584933e-10 3.047792e-11 -0.7074490 0.988
## Gzmb 2.085574e-04 1.528163e-10 -1.4262061 0.651
## Cd7 6.998708e-17 5.082192e-15 0.7004850 0.835
## Rap1b 8.711839e-02 1.709772e-12 -0.7099189 0.851
## Ctla2a 6.522361e-08 1.107877e-20 1.1238461 0.723
## Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Ccr2 0.536 1.179395e-40 1.691825e-54 1.1744811
## Ccl5 0.964 5.396421e-07 4.382549e-33 -0.8984798
## Gzmb 0.725 2.705766e-06 2.146793e-25 -1.9151981
## Cd7 0.617 8.998530e-11 2.698521e-25 0.6564855
## Rap1b 0.914 3.027322e-08 9.486466e-21 -0.7140731
## Ctla2a 0.422 1.961606e-16 1.165175e-13 0.5815499
## Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj max_pval
## Ccr2 0.930 0.579 2.995545e-50 1.477598e-06
## Ccl5 0.987 0.983 7.759740e-29 9.755418e-01
## Gzmb 0.642 0.769 3.801112e-21 7.180655e-01
## Cd7 0.863 0.612 4.778001e-21 2.706668e-02
## Rap1b 0.784 0.897 1.679674e-16 2.415908e-02
## Ctla2a 0.725 0.515 2.063058e-09 2.046767e-01
## minimump_p_val
## Ccr2 1.184277e-53
## Ccl5 3.067784e-32
## Gzmb 1.502755e-24
## Cd7 1.888964e-24
## Rap1b 6.640526e-20
## Ctla2a 7.755136e-20
FeaturePlot(merged_object, features = c("Ccr2", "Ccl5", "Gzmb", "Cd7", "Rap1b", "Ctla2a"), min.cutoff = 'q10', reduction = "umap")

Cluster 2
markers_c2 <- FindConservedMarkers(merged_object, ident.1 = 2, grouping.var = "orig.ident")
## Testing group Sample_4: (2) vs (1, 3, 0, 4, 5, 7)
## Testing group Sample_7: (2) vs (4, 3, 5, 1, 0, 7, 6)
## Testing group Sample_8: (2) vs (4, 0, 7, 5, 1, 9, 3, 6, 8)
## Testing group Sample_3: (2) vs (5, 1, 4, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (2) vs (1, 3, 5, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (2) vs (0, 4, 1, 5, 7, 3, 8, 6, 9)
## Testing group Sample_1: (2) vs (1, 0, 4, 3, 6, 5, 7, 8, 9)
head(markers_c2)
## Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Tmsb4x 2.126575e-03 -0.9393720 1.000 1.000
## Pmepa1 8.497064e-05 2.3574565 0.727 0.246
## Cfl1 1.283277e-01 -0.4920707 1.000 1.000
## Actb 6.877714e-04 -0.9738109 1.000 1.000
## Ncr1 6.609110e-03 -1.9666518 0.273 0.719
## Pfn1 1.882067e-04 -1.4659598 0.818 0.982
## Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Tmsb4x 1 0.006305328 -0.5618892 1.000
## Pmepa1 1 0.001317577 1.5985097 0.706
## Cfl1 1 0.005751684 -0.5873940 1.000
## Actb 1 0.009030797 -0.5576387 1.000
## Ncr1 1 0.113479711 -0.8403411 0.529
## Pfn1 1 0.127088695 -0.3465550 0.941
## Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Tmsb4x 1.000 1 8.965747e-23 -0.9892678
## Pmepa1 0.306 1 1.728834e-19 1.9303840
## Cfl1 0.992 1 7.279888e-18 -0.9227658
## Actb 1.000 1 1.125718e-15 -0.8355359
## Ncr1 0.674 1 4.114623e-14 -1.7499795
## Pfn1 0.975 1 1.508448e-10 -0.7139207
## Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Tmsb4x 1.000 1.000 1.587475e-18 1.528841e-32
## Pmepa1 0.723 0.310 3.061074e-15 2.465588e-39
## Cfl1 0.957 1.000 1.288977e-13 3.698814e-31
## Actb 0.989 1.000 1.993196e-11 1.020776e-31
## Ncr1 0.404 0.759 7.285351e-10 6.065192e-32
## Pfn1 0.915 0.973 2.670857e-06 1.286989e-24
## Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Tmsb4x -0.9714757 1.000 1.000 2.706966e-28
## Pmepa1 1.9931870 0.816 0.330 4.365570e-35
## Cfl1 -1.0118830 0.966 0.994 6.549121e-27
## Actb -0.9954313 1.000 1.000 1.807386e-27
## Ncr1 -2.7161662 0.230 0.702 1.073903e-27
## Pfn1 -0.9622337 0.856 0.974 2.278743e-20
## Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Tmsb4x 4.674011e-42 -1.044405 1.000 1.000
## Pmepa1 8.950016e-24 1.771852 0.646 0.310
## Cfl1 1.572640e-34 -1.000624 0.979 0.991
## Actb 5.979804e-40 -1.073383 0.990 1.000
## Ncr1 4.307079e-29 -2.244020 0.302 0.710
## Pfn1 3.396034e-32 -1.120258 0.854 0.968
## Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Tmsb4x 8.275803e-38 1.146018e-43 -1.1447200 1.000
## Pmepa1 1.584690e-19 8.081303e-47 2.4725710 0.813
## Cfl1 2.784516e-30 7.232115e-28 -0.9485548 0.986
## Actb 1.058784e-35 3.263250e-24 -0.8642225 1.000
## Ncr1 7.626115e-25 8.888288e-31 -2.2948923 0.309
## Pfn1 6.013017e-28 2.531783e-29 -1.1544189 0.856
## Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Tmsb4x 1.000 2.029139e-39 3.938290e-66 -0.9270128
## Pmepa1 0.255 1.430876e-42 2.028102e-62 2.0005571
## Cfl1 0.999 1.280518e-23 2.731259e-56 -0.9229065
## Actb 1.000 5.777911e-20 2.043755e-52 -0.8740888
## Ncr1 0.796 1.573760e-26 1.382370e-48 -1.9954968
## Pfn1 0.965 4.482774e-25 3.494870e-44 -0.9219084
## Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj max_pval
## Tmsb4x 1.000 1.000 6.973136e-62 0.006305328
## Pmepa1 0.729 0.267 3.590958e-58 0.001317577
## Cfl1 0.990 0.998 4.835968e-52 0.128327695
## Actb 1.000 1.000 3.618673e-48 0.009030797
## Ncr1 0.386 0.767 2.447624e-44 0.113479711
## Pfn1 0.901 0.975 6.188017e-40 0.127088695
## minimump_p_val
## Tmsb4x 2.756803e-65
## Pmepa1 1.419672e-61
## Cfl1 1.911882e-55
## Actb 1.430629e-51
## Ncr1 9.676590e-48
## Pfn1 2.446409e-43
FeaturePlot(merged_object, features = c("Tmsb4x", "Pmepa1", "Cfl1", "Actb", "Ncr1", "Pfn1"), min.cutoff = 'q10', reduction = "umap")

Cluster 3
markers_c3 <- FindConservedMarkers(merged_object, ident.1 = 3, grouping.var = "orig.ident")
## Testing group Sample_4: (3) vs (2, 1, 0, 4, 5, 7)
## Testing group Sample_7: (3) vs (4, 5, 2, 1, 0, 7, 6)
## Testing group Sample_8: (3) vs (4, 0, 7, 5, 2, 1, 9, 6, 8)
## Testing group Sample_3: (3) vs (5, 1, 2, 4, 0, 7, 6, 8, 9)
## Testing group Sample_2: (3) vs (1, 2, 5, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (3) vs (0, 4, 1, 5, 2, 7, 8, 6, 9)
## Testing group Sample_1: (3) vs (1, 2, 0, 4, 6, 5, 7, 8, 9)
head(markers_c3)
## Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Nr4a1 1.075620e-03 2.791555 1.000 0.661
## Pim1 6.061426e-03 1.168955 1.000 0.907
## Nr4a3 6.704097e-03 1.626052 1.000 0.534
## Nfkbiz 4.364376e-02 1.699575 0.857 0.568
## Nfkbid 7.821487e-05 3.211632 0.857 0.297
## Nfkbia 2.269208e-03 1.877935 1.000 0.754
## Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Nr4a1 1 0.0121817370 1.3701563 0.9
## Pim1 1 0.0003269975 1.2556559 1.0
## Nr4a3 1 0.0518880072 0.9726873 0.7
## Nfkbiz 1 0.0057749661 0.9913053 0.9
## Nfkbid 1 0.0190860168 1.4801381 0.7
## Nfkbia 1 0.0046363657 1.0017431 1.0
## Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Nr4a1 0.639 1 3.672879e-26 2.435278
## Pim1 0.956 1 1.131362e-14 1.229889
## Nr4a3 0.458 1 2.989974e-10 1.448916
## Nfkbiz 0.671 1 9.308188e-11 1.383970
## Nfkbid 0.418 1 6.377328e-19 2.148724
## Nfkbia 0.843 1 1.401214e-12 1.179727
## Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Nr4a1 1.000 0.641 6.503199e-22 4.764986e-31
## Pim1 1.000 0.927 2.003190e-10 5.732263e-20
## Nr4a3 0.815 0.454 5.294047e-06 4.295224e-15
## Nfkbiz 0.907 0.633 1.648108e-06 1.560632e-18
## Nfkbid 0.852 0.349 1.129170e-14 7.920557e-14
## Nfkbia 1.000 0.854 2.480990e-08 6.257187e-17
## Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Nr4a1 2.260595 0.987 0.626 8.436884e-27
## Pim1 1.151868 1.000 0.942 1.014954e-15
## Nr4a3 1.677667 0.855 0.475 7.605124e-11
## Nfkbiz 1.455207 0.961 0.668 2.763256e-14
## Nfkbid 1.642791 0.816 0.394 1.402414e-09
## Nfkbia 1.154027 1.000 0.843 1.107897e-12
## Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Nr4a1 1.838418e-34 2.3155490 0.976 0.595
## Pim1 3.136641e-16 0.8945974 1.000 0.951
## Nr4a3 1.403351e-13 1.1978058 0.833 0.465
## Nfkbiz 1.658155e-13 1.1750470 0.905 0.632
## Nfkbid 1.189584e-12 1.5745423 0.726 0.372
## Nfkbia 2.059349e-21 1.1855633 1.000 0.838
## Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Nr4a1 3.255103e-30 9.956319e-35 2.1426219 0.989
## Pim1 5.553737e-12 1.115536e-26 1.1996934 1.000
## Nr4a3 2.484773e-09 3.410772e-22 1.8185824 0.860
## Nfkbiz 2.935929e-09 3.901547e-20 1.3653676 0.957
## Nfkbid 2.106277e-08 6.063482e-25 1.8654458 0.892
## Nfkbia 3.646283e-17 1.019137e-18 0.9790313 0.989
## Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Nr4a1 0.656 1.762866e-30 1.158663e-83 2.1325037
## Pim1 0.946 1.975167e-22 2.276500e-55 1.1295109
## Nr4a3 0.443 6.039113e-18 2.045120e-47 1.5872597
## Nfkbiz 0.709 6.908079e-16 7.640821e-47 1.3556321
## Nfkbid 0.474 1.073600e-20 9.876995e-43 1.6775330
## Nfkbia 0.882 1.804483e-14 1.861125e-39 0.9540879
## Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj max_pval
## Nr4a1 0.995 0.677 2.051528e-79 0.012181737
## Pim1 1.000 0.964 4.030771e-51 0.006061426
## Nr4a3 0.894 0.460 3.621089e-43 0.051888007
## Nfkbiz 0.959 0.698 1.352884e-42 0.043643757
## Nfkbid 0.862 0.503 1.748821e-38 0.019086017
## Nfkbia 1.000 0.873 3.295307e-35 0.004636366
## minimump_p_val
## Nr4a1 8.110638e-83
## Pim1 1.593550e-54
## Nr4a3 1.431584e-46
## Nfkbiz 5.348575e-46
## Nfkbid 6.913896e-42
## Nfkbia 1.302787e-38
FeaturePlot(merged_object, features = c("Nr4a1", "Pim1", "Nr4a3", "Nfkbiz", "Nfkbid", "Nfkbia"), min.cutoff = 'q10', reduction = "umap")

Cluster 4
markers_c4 <- FindConservedMarkers(merged_object, ident.1 = 4, grouping.var = "orig.ident")
## Testing group Sample_4: (4) vs (2, 1, 3, 0, 5, 7)
## Testing group Sample_7: (4) vs (3, 5, 2, 1, 0, 7, 6)
## Testing group Sample_8: (4) vs (0, 7, 5, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (4) vs (5, 1, 2, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (4) vs (1, 3, 2, 5, 0, 6, 9, 7, 8)
## Testing group Sample_6: (4) vs (0, 1, 5, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (4) vs (1, 2, 0, 3, 6, 5, 7, 8, 9)
head(markers_c4)
## Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Tmem176b 2.497853e-13 7.200362 0.524 0.010
## Tmem176a 5.959621e-12 6.246949 0.476 0.010
## Cxcr6 4.221679e-05 1.457118 0.429 0.077
## Il7r 9.169272e-12 3.162340 0.857 0.173
## Ckb 5.450622e-05 3.871379 0.238 0.019
## Krt83 1.070233e-03 2.922802 0.190 0.019
## Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Tmem176b 4.422698e-09 3.420587e-27 4.0630651 0.675
## Tmem176a 1.055211e-07 1.538395e-21 3.2830784 0.584
## Cxcr6 7.474906e-01 3.956218e-15 1.6618579 0.779
## Il7r 1.623511e-07 1.542046e-07 0.8929757 0.844
## Ckb 9.650871e-01 4.314771e-14 3.6656390 0.390
## Krt83 1.000000e+00 2.563678e-14 5.7550200 0.312
## Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Tmem176b 0.055 6.056491e-23 7.316469e-80 6.759692
## Tmem176a 0.049 2.723883e-17 8.403114e-64 6.864799
## Cxcr6 0.236 7.004879e-11 6.906206e-40 3.522176
## Il7r 0.440 2.730346e-03 1.580684e-35 3.058017
## Ckb 0.033 7.639734e-10 2.262701e-40 5.147719
## Krt83 0.005 4.539249e-10 1.796440e-31 6.181299
## Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Tmem176b 0.704 0.015 1.295454e-75 1.339436e-80
## Tmem176a 0.568 0.011 1.487855e-59 5.186672e-68
## Cxcr6 0.593 0.064 1.222813e-35 2.201368e-34
## Il7r 0.716 0.135 2.798759e-31 2.233786e-38
## Ckb 0.420 0.017 4.006338e-36 2.499386e-24
## Krt83 0.296 0.007 3.180776e-27 4.419033e-26
## Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Tmem176b 6.669095 0.554 0.008 2.371605e-76
## Tmem176a 6.141527 0.482 0.008 9.183522e-64
## Cxcr6 4.151166 0.393 0.025 3.897742e-30
## Il7r 3.354353 0.661 0.083 3.955142e-34
## Ckb 5.246590 0.214 0.008 4.425412e-20
## Krt83 5.842426 0.179 0.003 7.824339e-22
## Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Tmem176b 2.309161e-60 5.413552 0.476 0.016
## Tmem176a 1.170699e-74 8.452846 0.429 0.002
## Cxcr6 1.278999e-59 4.611491 0.492 0.018
## Il7r 3.577088e-48 3.811770 0.667 0.071
## Ckb 1.043803e-42 5.097074 0.317 0.009
## Krt83 1.334703e-37 6.214594 0.254 0.005
## Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Tmem176b 4.088600e-56 1.606858e-93 6.826222 0.585
## Tmem176a 2.072839e-70 2.791078e-89 7.686168 0.512
## Cxcr6 2.264595e-55 2.069874e-61 4.440349 0.561
## Il7r 6.333593e-44 3.923625e-65 3.023299 0.829
## Ckb 1.848158e-38 1.228709e-57 5.000168 0.439
## Krt83 2.363225e-33 2.538458e-43 6.252155 0.268
## Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Tmem176b 0.010 2.845103e-89 6.103439e-124 5.659137
## Tmem176a 0.004 4.941883e-85 3.481817e-107 5.801093
## Cxcr6 0.035 3.664919e-57 1.255234e-94 4.741620
## Il7r 0.095 6.947171e-61 8.459578e-86 3.694017
## Ckb 0.016 2.175552e-53 1.175041e-81 5.588630
## Krt83 0.004 4.494594e-39 2.149519e-80 6.794259
## Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj max_pval
## Tmem176b 0.557 0.015 1.080675e-119 2.497853e-13
## Tmem176a 0.456 0.010 6.164904e-103 5.959621e-12
## Cxcr6 0.519 0.021 2.222516e-90 4.221679e-05
## Il7r 0.785 0.079 1.497853e-81 1.542046e-07
## Ckb 0.367 0.009 2.080528e-77 5.450622e-05
## Krt83 0.291 0.003 3.805938e-76 1.070233e-03
## minimump_p_val
## Tmem176b 4.272407e-123
## Tmem176a 2.437272e-106
## Cxcr6 8.786635e-94
## Il7r 5.921705e-85
## Ckb 8.225290e-81
## Krt83 1.504663e-79
FeaturePlot(merged_object, features = c("Tmem176b", "Tmem176a", "Cxcr6", "Il7r", "Ckb", "Krt83"), min.cutoff = 'q10', reduction = "umap")

Cluster 5
markers_c5 <- FindConservedMarkers(merged_object, ident.1 = 5, grouping.var = "orig.ident")
## Testing group Sample_4: (5) vs (2, 1, 3, 0, 4, 7)
## Testing group Sample_7: (5) vs (4, 3, 2, 1, 0, 7, 6)
## Testing group Sample_8: (5) vs (4, 0, 7, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (5) vs (1, 2, 4, 3, 0, 7, 6, 8, 9)
## Testing group Sample_2: (5) vs (1, 3, 2, 0, 4, 6, 9, 7, 8)
## Testing group Sample_6: (5) vs (0, 4, 1, 2, 7, 3, 8, 6, 9)
## Testing group Sample_1: (5) vs (1, 2, 0, 4, 3, 6, 7, 8, 9)
head(markers_c5)
## Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Cd3e 3.548072e-15 3.190758 0.9 0.043
## Cd3g 2.133633e-12 2.743428 0.9 0.070
## Trgc2 2.027462e-05 2.580021 0.3 0.017
## Cd3d 2.075574e-15 3.846109 1.0 0.070
## Trgc1 1.247819e-05 5.272598 0.3 0.017
## Dpp4 7.393485e-12 7.194596 0.4 0.000
## Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Cd3e 6.282216e-11 7.781409e-41 4.141041 0.929
## Cd3g 3.777811e-08 8.361552e-30 2.809674 0.875
## Trgc2 3.589824e-01 1.552416e-21 8.458705 0.411
## Cd3d 3.675012e-11 1.227549e-40 4.167588 0.911
## Trgc1 2.209388e-01 2.273059e-22 6.895311 0.464
## Dpp4 1.309091e-07 6.067777e-09 4.448364 0.250
## Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Cd3e 0.049 1.377776e-36 9.727149e-110 5.352490
## Cd3g 0.103 1.480496e-25 3.136613e-69 4.986241
## Trgc2 0.000 2.748708e-17 9.517830e-43 5.693060
## Cd3d 0.049 2.173498e-36 3.757123e-76 4.870504
## Trgc1 0.010 4.024678e-18 7.664631e-52 7.235920
## Dpp4 0.025 1.074361e-04 6.037741e-22 3.212364
## Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Cd3e 0.966 0.016 1.722289e-105 9.169223e-133
## Cd3g 0.780 0.038 5.553687e-65 2.200579e-77
## Trgc2 0.441 0.014 1.685227e-38 2.503943e-52
## Cd3d 0.864 0.047 6.652363e-72 1.555333e-91
## Trgc1 0.475 0.009 1.357100e-47 5.059710e-50
## Dpp4 0.339 0.029 1.069042e-17 3.516940e-20
## Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Cd3e 5.861757 0.912 0.014 1.623503e-128
## Cd3g 5.167211 0.702 0.029 3.896346e-73
## Trgc2 10.748960 0.298 0.000 4.433482e-48
## Cd3d 5.760607 0.737 0.022 2.753872e-87
## Trgc1 7.703775 0.333 0.004 8.958722e-46
## Dpp4 5.340003 0.175 0.007 6.227094e-16
## Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Cd3e 7.238403e-139 6.297918 0.918 0.014
## Cd3g 2.767216e-80 4.740166 0.673 0.022
## Trgc2 1.327865e-55 7.616575 0.347 0.004
## Cd3d 2.569800e-73 5.095550 0.714 0.035
## Trgc1 3.605875e-44 5.727967 0.327 0.007
## Dpp4 6.178949e-36 6.329157 0.265 0.006
## Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Cd3e 1.281632e-134 9.692243e-146 6.688313 0.92
## Cd3g 4.899632e-76 1.491914e-85 4.363994 0.78
## Trgc2 2.351118e-51 7.361676e-59 6.342774 0.44
## Cd3d 4.550088e-69 1.109206e-84 5.003268 0.76
## Trgc1 6.384562e-40 1.313098e-58 6.089648 0.44
## Dpp4 1.094045e-31 2.332838e-28 4.740599 0.26
## Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Cd3e 0.012 1.716109e-141 6.817531e-289 6.549859
## Cd3g 0.032 2.641582e-81 3.129003e-224 5.937174
## Trgc2 0.011 1.303458e-54 2.973040e-146 7.038254
## Cd3d 0.031 1.963960e-80 1.434006e-145 4.977010
## Trgc1 0.011 2.324972e-54 7.599189e-117 6.997875
## Dpp4 0.011 4.130523e-24 1.561138e-94 5.420075
## Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj max_pval
## Cd3e 0.981 0.011 1.207112e-284 3.548072e-15
## Cd3g 0.864 0.019 5.540213e-220 2.133633e-12
## Trgc2 0.505 0.005 5.264065e-142 2.027462e-05
## Cd3d 0.738 0.035 2.539051e-141 2.075574e-15
## Trgc1 0.437 0.007 1.345512e-112 1.247819e-05
## Dpp4 0.379 0.008 2.764151e-90 6.067777e-09
## minimump_p_val
## Cd3e 4.772271e-288
## Cd3g 2.190302e-223
## Trgc2 2.081128e-145
## Cd3d 1.003804e-144
## Trgc1 5.319432e-116
## Dpp4 1.092797e-93
FeaturePlot(merged_object, features = c("Cd3e", "Cd3g", "Trgc2", "Cd3d", "Trgc1", "Dpp4"), min.cutoff = 'q10', reduction = "umap")

Cluster 6
markers_c6 <- FindConservedMarkers(merged_object, ident.1 = 6, grouping.var = "orig.ident")
## Warning: Identity: 6 not present in group Sample_4. Skipping Sample_4
## Warning: Sample_7 has fewer than 3 cells in Identity: 6. Skipping Sample_7
## Testing group Sample_8: (6) vs (4, 0, 7, 5, 2, 1, 9, 3, 8)
## Testing group Sample_3: (6) vs (5, 1, 2, 4, 3, 0, 7, 8, 9)
## Testing group Sample_2: (6) vs (1, 3, 2, 5, 0, 4, 9, 7, 8)
## Testing group Sample_6: (6) vs (0, 4, 1, 5, 2, 7, 3, 8, 9)
## Testing group Sample_1: (6) vs (1, 2, 0, 4, 3, 5, 7, 8, 9)
head(markers_c6)
## Sample_8_p_val Sample_8_avg_log2FC Sample_8_pct.1 Sample_8_pct.2
## Oas3 6.830673e-17 4.9252370 0.417 0.020
## Mx1 8.286387e-18 4.5915557 0.500 0.028
## Rsad2 7.110979e-04 2.4915473 0.167 0.020
## Ifit1 3.452357e-21 4.3184725 0.500 0.022
## Slfn5 4.835250e-01 0.6795144 0.083 0.041
## Ifi204 1.802980e-11 3.8519952 0.333 0.020
## Sample_8_p_val_adj Sample_3_p_val Sample_3_avg_log2FC Sample_3_pct.1
## Oas3 1.209439e-12 3.018117e-68 6.911300 0.667
## Mx1 1.467188e-13 6.064391e-32 5.534447 0.583
## Rsad2 1.000000e+00 1.327859e-14 5.585285 0.333
## Ifit1 6.112742e-17 6.750795e-09 3.557636 0.250
## Slfn5 1.000000e+00 1.355641e-16 5.009240 0.417
## Ifi204 3.192356e-07 5.244589e-42 5.652052 0.750
## Sample_3_pct.2 Sample_3_p_val_adj Sample_2_p_val Sample_2_avg_log2FC
## Oas3 0.007 5.343878e-64 1.188691e-08 5.081359
## Mx1 0.020 1.073761e-27 1.672608e-22 5.315476
## Rsad2 0.016 2.351108e-10 4.195103e-56 6.316829
## Ifit1 0.016 1.195296e-04 8.426429e-07 4.291554
## Slfn5 0.022 2.400298e-12 8.867843e-04 4.615357
## Ifi204 0.025 9.286068e-38 1.355291e-30 5.122179
## Sample_2_pct.1 Sample_2_pct.2 Sample_2_p_val_adj Sample_6_p_val
## Oas3 0.2 0.009 2.104696e-04 9.328074e-07
## Mx1 0.5 0.019 2.961519e-18 1.044571e-23
## Rsad2 0.7 0.011 7.427849e-52 5.320054e-14
## Ifit1 0.2 0.013 1.491983e-02 1.349365e-44
## Slfn5 0.2 0.026 1.000000e+00 1.908852e-10
## Ifi204 0.6 0.019 2.399678e-26 6.849094e-18
## Sample_6_avg_log2FC Sample_6_pct.1 Sample_6_pct.2 Sample_6_p_val_adj
## Oas3 4.254094 0.2 0.013 1.651629e-02
## Mx1 4.533246 0.5 0.018 1.849517e-19
## Rsad2 6.417870 0.2 0.005 9.419687e-10
## Ifit1 6.333328 0.5 0.007 2.389186e-40
## Slfn5 4.377011 0.3 0.017 3.379813e-06
## Ifi204 4.303219 0.4 0.016 1.212701e-13
## Sample_1_p_val Sample_1_avg_log2FC Sample_1_pct.1 Sample_1_pct.2
## Oas3 1.610099e-50 4.636630 0.465 0.026
## Mx1 1.038707e-56 4.700852 0.535 0.032
## Rsad2 3.082650e-09 3.401074 0.140 0.015
## Ifit1 1.900743e-24 3.556260 0.279 0.021
## Slfn5 6.054133e-43 4.166827 0.395 0.022
## Ifi204 4.002328e-40 4.620068 0.349 0.018
## Sample_1_p_val_adj max_pval minimump_p_val
## Oas3 2.850842e-46 9.328074e-07 1.509059e-67
## Mx1 1.839134e-52 8.286387e-18 5.193534e-56
## Rsad2 5.458139e-05 7.110979e-04 2.097551e-55
## Ifit1 3.365456e-20 8.426429e-07 6.746826e-44
## Slfn5 1.071945e-38 4.835250e-01 3.027067e-42
## Ifi204 7.086523e-36 1.802980e-11 2.622294e-41
FeaturePlot(merged_object, features = c("Oas3", "Mx1", "Rsad2", "Ifit1", "Slfn5", "Ifi204"), min.cutoff = 'q10', reduction = "umap")

Cluster 7
markers_c7 <- FindConservedMarkers(merged_object, ident.1 = 7, grouping.var = "orig.ident")
## Testing group Sample_4: (7) vs (2, 1, 3, 0, 4, 5)
## Testing group Sample_7: (7) vs (4, 3, 5, 2, 1, 0, 6)
## Testing group Sample_8: (7) vs (4, 0, 5, 2, 1, 9, 3, 6, 8)
## Testing group Sample_3: (7) vs (5, 1, 2, 4, 3, 0, 6, 8, 9)
## Testing group Sample_2: (7) vs (1, 3, 2, 5, 0, 4, 6, 9, 8)
## Testing group Sample_6: (7) vs (0, 4, 1, 5, 2, 3, 8, 6, 9)
## Testing group Sample_1: (7) vs (1, 2, 0, 4, 3, 6, 5, 8, 9)
head(markers_c7)
## Sample_4_p_val Sample_4_avg_log2FC Sample_4_pct.1 Sample_4_pct.2
## Cd8b1 7.384557e-19 9.099437 0.8 0.008
## Cd8a 4.316244e-23 10.623875 0.8 0.000
## Themis 1.072380e-28 9.722424 1.0 0.000
## Pdcd1 1.988629e-15 5.137715 0.8 0.017
## Fam169b 8.688751e-16 6.134311 0.8 0.017
## Havcr2 1.462949e-17 7.612434 0.6 0.000
## Sample_4_p_val_adj Sample_7_p_val Sample_7_avg_log2FC Sample_7_pct.1
## Cd8b1 1.307510e-14 3.093571e-30 8.747824 0.714
## Cd8a 7.642341e-19 9.015139e-24 7.190432 0.714
## Themis 1.898756e-24 1.182509e-34 7.098802 0.714
## Pdcd1 3.521067e-11 3.616340e-11 5.067604 0.429
## Fam169b 1.538430e-11 1.506891e-34 6.698552 0.714
## Havcr2 2.590298e-13 6.318577e-12 7.861271 0.286
## Sample_7_pct.2 Sample_7_p_val_adj Sample_8_p_val Sample_8_avg_log2FC
## Cd8b1 0.008 5.477477e-26 3.006635e-61 7.929075
## Cd8a 0.016 1.596220e-19 4.491718e-64 7.762283
## Themis 0.004 2.093751e-30 2.601179e-87 8.421810
## Pdcd1 0.016 6.403092e-07 5.239091e-33 5.887830
## Fam169b 0.004 2.668102e-30 8.933740e-18 4.558485
## Havcr2 0.004 1.118767e-07 6.350868e-05 5.205317
## Sample_8_pct.1 Sample_8_pct.2 Sample_8_p_val_adj Sample_3_p_val
## Cd8b1 0.714 0.005 5.323548e-57 4.438016e-150
## Cd8a 0.857 0.008 7.953036e-60 7.587654e-83
## Themis 0.857 0.003 4.605647e-83 2.293814e-53
## Pdcd1 0.714 0.016 9.276335e-29 1.380913e-96
## Fam169b 0.429 0.011 1.581808e-13 3.526183e-25
## Havcr2 0.143 0.007 1.000000e+00 3.403776e-37
## Sample_3_avg_log2FC Sample_3_pct.1 Sample_3_pct.2 Sample_3_p_val_adj
## Cd8b1 10.047035 1.0 0.001 7.857951e-146
## Cd8a 7.923414 1.0 0.007 1.343470e-78
## Themis 8.262980 0.8 0.009 4.061428e-49
## Pdcd1 8.636668 0.8 0.002 2.445045e-92
## Fam169b 7.754720 0.4 0.005 6.243459e-21
## Havcr2 7.923650 0.6 0.007 6.026726e-33
## Sample_2_p_val Sample_2_avg_log2FC Sample_2_pct.1 Sample_2_pct.2
## Cd8b1 1.699719e-124 11.151140 0.8 0.001
## Cd8a 2.259005e-63 9.097452 0.8 0.007
## Themis 3.234194e-124 9.805786 0.8 0.001
## Pdcd1 5.559121e-89 7.410570 0.8 0.003
## Fam169b 1.963940e-103 7.929846 0.8 0.002
## Havcr2 3.050592e-23 6.884515 0.4 0.006
## Sample_2_p_val_adj Sample_6_p_val Sample_6_avg_log2FC Sample_6_pct.1
## Cd8b1 3.009522e-120 3.305878e-140 10.675293 1.0
## Cd8a 3.999794e-59 9.841570e-110 9.914961 1.0
## Themis 5.726464e-120 1.634437e-49 7.779124 0.8
## Pdcd1 9.842980e-85 1.292792e-57 7.058317 0.8
## Fam169b 3.477351e-99 6.488996e-51 6.978928 0.6
## Havcr2 5.401378e-19 7.059931e-90 7.896481 0.8
## Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val Sample_1_avg_log2FC
## Cd8b1 0.002 5.853388e-136 7.195229e-262 11.381910
## Cd8a 0.005 1.742548e-105 1.393163e-185 9.100785
## Themis 0.010 2.893934e-45 2.361902e-82 6.702105
## Pdcd1 0.008 2.289018e-53 1.017534e-103 7.332221
## Fam169b 0.005 1.148942e-46 8.955323e-14 4.909992
## Havcr2 0.003 1.250031e-85 6.149526e-30 6.878676
## Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj max_pval
## Cd8b1 1.000 0.002 1.273987e-257 7.384557e-19
## Cd8a 1.000 0.005 2.466735e-181 4.316244e-23
## Themis 0.667 0.007 4.181984e-78 1.072380e-28
## Pdcd1 0.444 0.001 1.801645e-99 3.616340e-11
## Fam169b 0.222 0.006 1.585630e-09 8.955323e-14
## Havcr2 0.333 0.006 1.088835e-25 6.350868e-05
## minimump_p_val
## Cd8b1 5.036660e-261
## Cd8a 9.752142e-185
## Themis 2.263936e-123
## Pdcd1 7.122735e-103
## Fam169b 1.374758e-102
## Havcr2 4.941951e-89
FeaturePlot(merged_object, features = c("Cd8b1", "Cd8a", "Themis", "Pdcd1", "Fam169b", "Havcr2"), min.cutoff = 'q10', reduction = "umap")

Cluster 8
markers_c8 <- FindConservedMarkers(merged_object, ident.1 = 8, grouping.var = "orig.ident")
## Warning: Identity: 8 not present in group Sample_4. Skipping Sample_4
## Warning: Identity: 8 not present in group Sample_7. Skipping Sample_7
## Testing group Sample_8: (8) vs (4, 0, 7, 5, 2, 1, 9, 3, 6)
## Testing group Sample_3: (8) vs (5, 1, 2, 4, 3, 0, 7, 6, 9)
## Warning: Sample_2 has fewer than 3 cells in Identity: 8. Skipping Sample_2
## Testing group Sample_6: (8) vs (0, 4, 1, 5, 2, 7, 3, 6, 9)
## Testing group Sample_1: (8) vs (1, 2, 0, 4, 3, 6, 5, 7, 9)
head(markers_c8)
## Sample_8_p_val Sample_8_avg_log2FC Sample_8_pct.1 Sample_8_pct.2
## Pclaf 6.210833e-102 8.769795 1.000 0.002
## E2f1 3.394789e-37 7.660554 0.667 0.005
## Kntc1 3.792734e-16 5.741189 0.333 0.003
## Nuf2 7.161577e-91 9.267855 0.667 0.000
## Spc24 7.161577e-91 9.720665 0.667 0.000
## Cdc6 1.036754e-81 7.851885 1.000 0.003
## Sample_8_p_val_adj Sample_3_p_val Sample_3_avg_log2FC Sample_3_pct.1
## Pclaf 1.099690e-97 7.406302e-81 10.665802 0.667
## E2f1 6.010813e-33 1.663237e-67 6.477685 1.000
## Kntc1 6.715414e-12 4.302516e-16 5.452783 0.333
## Nuf2 1.268029e-86 1.184810e-40 6.564405 0.667
## Spc24 1.268029e-86 2.664260e-21 7.475725 0.333
## Cdc6 1.835676e-77 3.035532e-31 8.660000 0.333
## Sample_3_pct.2 Sample_3_p_val_adj Sample_6_p_val Sample_6_avg_log2FC
## Pclaf 0.001 1.311360e-76 1.343420e-33 8.091555
## E2f1 0.006 2.944927e-63 6.546889e-36 6.724959
## Kntc1 0.004 7.618034e-12 3.057158e-42 6.253711
## Nuf2 0.005 2.097824e-36 3.132765e-09 5.314474
## Spc24 0.002 4.717339e-17 2.943684e-27 5.193999
## Cdc6 0.001 5.374713e-27 8.514606e-74 8.109040
## Sample_6_pct.1 Sample_6_pct.2 Sample_6_p_val_adj Sample_1_p_val
## Pclaf 0.500 0.007 2.378659e-29 1.196654e-67
## E2f1 0.667 0.012 1.159192e-31 6.911734e-95
## Kntc1 0.500 0.005 5.413004e-38 2.994900e-93
## Nuf2 0.167 0.003 5.546874e-05 6.957314e-07
## Spc24 0.500 0.009 5.212088e-23 4.872996e-51
## Cdc6 0.500 0.001 1.507596e-69 1.755461e-63
## Sample_1_avg_log2FC Sample_1_pct.1 Sample_1_pct.2 Sample_1_p_val_adj
## Pclaf 7.797064 0.5 0.005 2.118795e-63
## E2f1 7.000323 0.7 0.007 1.223792e-90
## Kntc1 7.335552 0.4 0.001 5.302770e-89
## Nuf2 4.493786 0.1 0.003 1.231862e-02
## Spc24 6.386756 0.4 0.004 8.628126e-47
## Cdc6 7.767022 0.3 0.001 3.108219e-59
## max_pval minimump_p_val
## Pclaf 1.343420e-33 2.484333e-101
## E2f1 6.546889e-36 2.764694e-94
## Kntc1 4.302516e-16 1.197960e-92
## Nuf2 6.957314e-07 2.864631e-90
## Spc24 2.664260e-21 2.864631e-90
## Cdc6 3.035532e-31 4.147015e-81
FeaturePlot(merged_object, features = c("Pclaf", "E2f1", "Kntc1", "Nuf2", "Spc24", "Cdc6"), min.cutoff = 'q10', reduction = "umap")

Cluster 9
markers_c9 <- FindConservedMarkers(merged_object, ident.1 = 9, grouping.var = "orig.ident")
## Warning: Identity: 9 not present in group Sample_4. Skipping Sample_4
## Warning: Identity: 9 not present in group Sample_7. Skipping Sample_7
## Testing group Sample_8: (9) vs (4, 0, 7, 5, 2, 1, 3, 6, 8)
## Testing group Sample_3: (9) vs (5, 1, 2, 4, 3, 0, 7, 6, 8)
## Testing group Sample_2: (9) vs (1, 3, 2, 5, 0, 4, 6, 7, 8)
## Testing group Sample_6: (9) vs (0, 4, 1, 5, 2, 7, 3, 8, 6)
## Warning: Sample_1 has fewer than 3 cells in Identity: 9. Skipping Sample_1
head(markers_c9)
## Sample_8_p_val Sample_8_avg_log2FC Sample_8_pct.1 Sample_8_pct.2
## Cd40 2.148713e-12 5.085492 0.25 0.003
## Clec4d 4.824541e-102 11.186172 0.75 0.000
## Sirpa 6.432115e-109 9.865326 1.00 0.002
## Pirb 4.824541e-102 10.333748 0.75 0.000
## Tgfbi 1.940361e-20 6.953467 0.50 0.008
## Il1b 3.176763e-69 11.295252 1.00 0.007
## Sample_8_p_val_adj Sample_3_p_val Sample_3_avg_log2FC Sample_3_pct.1
## Cd40 3.804511e-08 1.805919e-80 8.067746 0.667
## Clec4d 8.542332e-98 3.078131e-16 7.245606 0.333
## Sirpa 1.138870e-104 9.216522e-180 12.181824 1.000
## Pirb 8.542332e-98 9.216522e-180 11.720400 1.000
## Tgfbi 3.435603e-16 2.831514e-135 12.333369 1.000
## Il1b 5.624776e-65 1.713555e-135 14.178525 1.000
## Sample_3_pct.2 Sample_3_p_val_adj Sample_2_p_val Sample_2_avg_log2FC
## Cd40 0.001 3.197561e-76 1.211856e-22 7.263443
## Clec4d 0.004 5.450138e-12 1.854089e-193 13.258277
## Sirpa 0.000 1.631877e-175 1.498412e-145 9.568742
## Pirb 0.000 1.631877e-175 1.545385e-65 8.507640
## Tgfbi 0.001 5.013479e-131 9.069434e-146 10.084735
## Il1b 0.001 3.034020e-131 3.818238e-117 14.073994
## Sample_2_pct.1 Sample_2_pct.2 Sample_2_p_val_adj Sample_6_p_val
## Cd40 0.333 0.002 2.145712e-18 5.576688e-195
## Clec4d 1.000 0.000 3.282850e-189 8.533422e-34
## Sirpa 1.000 0.001 2.653089e-141 4.782747e-66
## Pirb 0.667 0.002 2.736259e-61 5.235819e-23
## Tgfbi 1.000 0.001 1.605834e-141 5.235819e-23
## Il1b 1.000 0.002 6.760572e-113 2.567745e-53
## Sample_6_avg_log2FC Sample_6_pct.1 Sample_6_pct.2 Sample_6_p_val_adj
## Cd40 12.564580 1.000 0.000 9.874084e-191
## Clec4d 8.557133 0.333 0.001 1.510928e-29
## Sirpa 10.429122 0.333 0.000 8.468331e-62
## Pirb 7.733398 0.333 0.002 9.270541e-19
## Tgfbi 8.048040 0.333 0.002 9.270541e-19
## Il1b 9.920172 0.667 0.003 4.546450e-49
## max_pval minimump_p_val
## Cd40 2.148713e-12 2.230675e-194
## Clec4d 3.078131e-16 7.416356e-193
## Sirpa 4.782747e-66 3.686609e-179
## Pirb 5.235819e-23 3.686609e-179
## Tgfbi 1.940361e-20 3.627774e-145
## Il1b 2.567745e-53 6.854218e-135
FeaturePlot(merged_object, features = c("Cd40", "Clec4d", "Sirpa", "Pirb", "Tgfbi", "Il1b"), min.cutoff = 'q10', reduction = "umap")
